Compare commits
415 Commits
feature/wo
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
09057b5756 | ||
|
|
8e2c2ad2c3 | ||
|
|
4a0ed6c80a | ||
|
|
caab7e2e5d | ||
|
|
5f5f5edf73 | ||
|
|
28f5ca8666 | ||
|
|
ee9d3bdef5 | ||
|
|
7485123390 | ||
|
|
1155236c74 | ||
|
|
0a26944374 | ||
|
|
239feaf069 | ||
|
|
a2324214e9 | ||
|
|
141f95e88e | ||
|
|
d7f2edf630 | ||
|
|
214cbaee7d | ||
|
|
15711d21fc | ||
|
|
4ab2679d55 | ||
|
|
3d2718ffbe | ||
|
|
3fa7987651 | ||
|
|
b6369d0191 | ||
|
|
69a013538c | ||
|
|
11088b87ff | ||
|
|
d1d58da0b2 | ||
|
|
93c8bc3598 | ||
|
|
c5ed50f1b3 | ||
|
|
ec2e942810 | ||
|
|
e8e7261409 | ||
|
|
1d254238f3 | ||
|
|
0b4ed48d2f | ||
|
|
87da7625cd | ||
|
|
9f3bc8a843 | ||
|
|
c33ed1cae9 | ||
|
|
38e7980cf4 | ||
|
|
887ce33b11 | ||
|
|
de239df314 | ||
|
|
6fcc64933a | ||
|
|
3488905ccc | ||
|
|
3ee09fbe84 | ||
|
|
7d65e0ae59 | ||
|
|
25f9118662 | ||
|
|
5c0de752af | ||
|
|
a90b10a1f7 | ||
|
|
75822ab67d | ||
|
|
df4d599478 | ||
|
|
4544718cad | ||
|
|
47da61ed71 | ||
|
|
e450d2e99e | ||
|
|
205a8b3159 | ||
|
|
8bd29d11bb | ||
|
|
4e7b3d2336 | ||
|
|
849123693a | ||
|
|
a1227f77b9 | ||
|
|
415e89a012 | ||
|
|
45844c6281 | ||
|
|
24c9586d81 | ||
|
|
f8d61446d5 | ||
|
|
0f859d1c75 | ||
|
|
52dc669782 | ||
|
|
2e47996354 | ||
|
|
f25d4eaf27 | ||
|
|
61a6be888c | ||
|
|
09c2b3a0e1 | ||
|
|
cec34198c7 | ||
|
|
3c10e07e45 | ||
|
|
3582c2e9e2 | ||
|
|
c6874977ee | ||
|
|
68430f5c22 | ||
|
|
ccefd325aa | ||
|
|
e119c5af53 | ||
|
|
e61224aaed | ||
|
|
7cf1b7643f | ||
|
|
74f813d68f | ||
|
|
f38f1024de | ||
|
|
358099c58a | ||
|
|
7fdcfc4fc4 | ||
|
|
541b461283 | ||
|
|
8f25cf10ab | ||
|
|
79e434212f | ||
|
|
600172eff6 | ||
|
|
4c12763fa1 | ||
|
|
2cb9a093f4 | ||
|
|
15ab40a820 | ||
|
|
2708fbe319 | ||
|
|
231d49e3e8 | ||
|
|
17defa046c | ||
|
|
d76a5fb3c5 | ||
|
|
f19fc59583 | ||
|
|
4c183c87a9 | ||
|
|
ffa05f89c4 | ||
|
|
9aa885211e | ||
|
|
b24b2fbc89 | ||
|
|
f7371de3d1 | ||
|
|
98970acf13 | ||
|
|
f0e636ac70 | ||
|
|
138ade07e1 | ||
|
|
728168e799 | ||
|
|
42c6bb7424 | ||
|
|
b32e847270 | ||
|
|
287627195c | ||
|
|
bfb965fa44 | ||
|
|
7bbc77a854 | ||
|
|
39ba522643 | ||
|
|
6ea4cd0d05 | ||
|
|
520cba9d31 | ||
|
|
331b6273ac | ||
|
|
d4a18cc3ce | ||
|
|
977803d862 | ||
|
|
48c640aae5 | ||
|
|
918a1c6b26 | ||
|
|
c7541ec2eb | ||
|
|
8676762d6b | ||
|
|
3f393ef77f | ||
|
|
a8360c7260 | ||
|
|
0979c9c37a | ||
|
|
b607fd7f44 | ||
|
|
bf988529eb | ||
|
|
04153a2efa | ||
|
|
a1a6876064 | ||
|
|
83466a03c3 | ||
|
|
35d6a17740 | ||
|
|
294d3db7a2 | ||
|
|
bbbd21ba94 | ||
|
|
3496be3064 | ||
|
|
af859a85f9 | ||
|
|
d3f5e4ef4b | ||
|
|
abef265ae9 | ||
|
|
b28a91fca5 | ||
|
|
60b221e7fb | ||
|
|
15cb657f13 | ||
|
|
f15920e508 | ||
|
|
9518ca48a5 | ||
|
|
3e9667571f | ||
|
|
8f6efd377b | ||
|
|
83e9718d78 | ||
|
|
f5cb17e1d4 | ||
|
|
f48a503e82 | ||
|
|
e7b392141a | ||
|
|
15a5a4239e | ||
|
|
20d7534b93 | ||
|
|
698995e46f | ||
|
|
1861e18396 | ||
|
|
eedc027ff6 | ||
|
|
ec5fcd9bc4 | ||
|
|
58150dafa6 | ||
|
|
06adab7225 | ||
|
|
38d7678a2e | ||
|
|
aac1181f3d | ||
|
|
4eaf7e50d7 | ||
|
|
4cb4e1c502 | ||
|
|
f0bb454ca2 | ||
|
|
b8bdc48c1e | ||
|
|
8173fd2845 | ||
|
|
3921e66933 | ||
|
|
ad79605961 | ||
|
|
6439de5cd4 | ||
|
|
b51ba17d32 | ||
|
|
2d631dfad0 | ||
|
|
072388ffb2 | ||
|
|
b456fe5097 | ||
|
|
eb5b2a876e | ||
|
|
ad09aadcc9 | ||
|
|
a020e31a46 | ||
|
|
83f629fec4 | ||
|
|
d810592bf2 | ||
|
|
d02c347ef6 | ||
|
|
d779a08bbf | ||
|
|
1490c60d2a | ||
|
|
ba15802a77 | ||
|
|
d8a22fba53 | ||
|
|
cf99ef9e09 | ||
|
|
3d0ea21007 | ||
|
|
023cfc127f | ||
|
|
5ea92e25af | ||
|
|
3b8171d94e | ||
|
|
d7da0b938d | ||
|
|
88e590d026 | ||
|
|
c215d11a84 | ||
|
|
59e0e45f8f | ||
|
|
e9a688fbb3 | ||
|
|
8b3ae40089 | ||
|
|
a8fec97bcb | ||
|
|
c969c7385b | ||
|
|
5084cb1a85 | ||
|
|
ec6843dfd6 | ||
|
|
268429b86c | ||
|
|
5c08135007 | ||
|
|
9f0d68d4c9 | ||
|
|
e11400566e | ||
|
|
987ed062d5 | ||
|
|
e50f54e621 | ||
|
|
983cd71fc2 | ||
|
|
7849ee0256 | ||
|
|
432842f442 | ||
|
|
94ebbb2497 | ||
|
|
e826a4dd3e | ||
|
|
b7e96359ef | ||
|
|
b1c1955082 | ||
|
|
95c23fcdff | ||
|
|
7067db68fc | ||
|
|
271faf0f00 | ||
|
|
291a8279bd | ||
|
|
b69d03c02f | ||
|
|
54f59c6082 | ||
|
|
c16c3083b1 | ||
|
|
656b00332e | ||
|
|
843f6ded75 | ||
|
|
0175a6817e | ||
|
|
24dd301d84 | ||
|
|
1d6211db19 | ||
|
|
e62f927218 | ||
|
|
675f42841e | ||
|
|
472dbdf418 | ||
|
|
5fcc03aff4 | ||
|
|
2d489e068b | ||
|
|
470097eb19 | ||
|
|
5af86edf83 | ||
|
|
55b26e9153 | ||
|
|
97bfdb9618 | ||
|
|
6f49c5e84a | ||
|
|
a6f09ee6e3 | ||
|
|
c62f8cbf06 | ||
|
|
e4e8438d8b | ||
|
|
822d2b0609 | ||
|
|
dfd36dacf8 | ||
|
|
4ea7139ed5 | ||
|
|
63023a4061 | ||
|
|
13a80e893e | ||
|
|
c98c409f59 | ||
|
|
6c8993f7bd | ||
|
|
92f88fdcd6 | ||
|
|
fd4a9b1434 | ||
|
|
832ef1cf83 | ||
|
|
b05eaceaf0 | ||
|
|
909470d3dc | ||
|
|
9a24b4896c | ||
|
|
dd8fce6e35 | ||
|
|
65b96d9cb9 | ||
|
|
f82eed4dc3 | ||
|
|
d997ec51a2 | ||
|
|
6490df9faf | ||
|
|
d86190912f | ||
|
|
a077f81c65 | ||
|
|
6bcadd9e71 | ||
|
|
a77bf8611a | ||
|
|
33feca3138 | ||
|
|
7d85a97b63 | ||
|
|
ce081effd4 | ||
|
|
daab0ae9b2 | ||
|
|
2ed088b4d8 | ||
|
|
d3c49fa246 | ||
|
|
52cb5014fd | ||
|
|
50654be910 | ||
|
|
cdab71a1ee | ||
|
|
a35976b9e9 | ||
|
|
c68210c485 | ||
|
|
f2864bd2ad | ||
|
|
eca9e85242 | ||
|
|
3f958fbff3 | ||
|
|
c84ef0396b | ||
|
|
e1c67dcee5 | ||
|
|
34c8a8cc67 | ||
|
|
6cd1f55119 | ||
|
|
e918234928 | ||
|
|
888a608485 | ||
|
|
b5c3b05246 | ||
|
|
fdce5e0302 | ||
|
|
4679b245de | ||
|
|
a837070f54 | ||
|
|
5a929e9803 | ||
|
|
52b0fad410 | ||
|
|
9944031eea | ||
|
|
2babaa7136 | ||
|
|
90567511dd | ||
|
|
beb16ad0cb | ||
|
|
fc7fc5ea85 | ||
|
|
ab8956b14b | ||
|
|
1d9c90641f | ||
|
|
6126b907f2 | ||
|
|
cc93d2d483 | ||
|
|
7642c17ec0 | ||
|
|
cb60dcf352 | ||
|
|
5ffe05d519 | ||
|
|
8e2f07c941 | ||
|
|
0b6e615075 | ||
|
|
be251c6fb3 | ||
|
|
efb1e89e33 | ||
|
|
529c447413 | ||
|
|
1eaf95c06b | ||
|
|
138ed17d8b | ||
|
|
a880c41d89 | ||
|
|
2a9ae61dce | ||
|
|
1f21911fa1 | ||
|
|
6f0a58f5d2 | ||
|
|
8206dce821 | ||
|
|
ced1afaa8a | ||
|
|
d6c602c567 | ||
|
|
a252a7fefd | ||
|
|
83b06c21cc | ||
|
|
f5214da54c | ||
|
|
e3d4dd0127 | ||
|
|
d0ee0d72f5 | ||
|
|
521f0550cd | ||
|
|
8a09691e91 | ||
|
|
459ad7d9c9 | ||
|
|
d102d27731 | ||
|
|
01810c40a1 | ||
|
|
b7d33e1cbf | ||
|
|
5b34b5a78c | ||
|
|
c091d2316b | ||
|
|
e8862b8a8b | ||
|
|
1b46ab699d | ||
|
|
ac1995f63f | ||
|
|
de93669652 | ||
|
|
dffc124920 | ||
|
|
932ceb0287 | ||
|
|
824d48fd85 | ||
|
|
47fdab0382 | ||
|
|
ed7ddc6375 | ||
|
|
cf06f4a8c0 | ||
|
|
a2fa21f65c | ||
|
|
61e915968f | ||
|
|
4949b22457 | ||
|
|
1fb0eb94c2 | ||
|
|
9aefb554bc | ||
|
|
a4338669a9 | ||
|
|
1fa9ea496c | ||
|
|
31756a2233 | ||
|
|
166583621b | ||
|
|
ca952c4674 | ||
|
|
4054778b6c | ||
|
|
56a5f00015 | ||
|
|
a96d50c481 | ||
|
|
4806212f46 | ||
|
|
2486f3c6b2 | ||
|
|
f25bebf6ee | ||
|
|
22dad6d0fc | ||
|
|
03eab66d35 | ||
|
|
97b1ab23d8 | ||
|
|
9fff0ba430 | ||
|
|
7d3e91b2e6 | ||
|
|
74957a9ec5 | ||
|
|
2d035c46cf | ||
|
|
53445fe72a | ||
|
|
37cc8956c5 | ||
|
|
197c82f921 | ||
|
|
2c52493a9c | ||
|
|
2ee2ba6b8c | ||
|
|
bafcf1694a | ||
|
|
95792aab15 | ||
|
|
38ae2c3a3e | ||
|
|
249d3c1b7f | ||
|
|
9647f94f89 | ||
|
|
afc288d2cf | ||
|
|
df01ce6aad | ||
|
|
aea93bc96b | ||
|
|
4e84f30f8b | ||
|
|
b20a0a4fa5 | ||
|
|
6eb1babc86 | ||
|
|
9a9c2f76a2 | ||
|
|
56cc171287 | ||
|
|
0295637ed6 | ||
|
|
9c6dd37316 | ||
|
|
524d13209a | ||
|
|
9199db3927 | ||
|
|
a0652c7c73 | ||
|
|
89c262ee20 | ||
|
|
7f9cf559cf | ||
|
|
bbe039c868 | ||
|
|
4e5c09a2a5 | ||
|
|
7f65598332 | ||
|
|
75315ed91e | ||
|
|
7fe7d17b43 | ||
|
|
7e517b5801 | ||
|
|
38ba9021d1 | ||
|
|
ddebad48d3 | ||
|
|
1cebf2e296 | ||
|
|
1d6e67d837 | ||
|
|
cfb4b6e4ce | ||
|
|
f418c403d6 | ||
|
|
be4221af46 | ||
|
|
ca07606b05 | ||
|
|
baf1bf2eb7 | ||
|
|
4ef3a8d72b | ||
|
|
09dd756eff | ||
|
|
ec8ef6210c | ||
|
|
a9b7a4d7a9 | ||
|
|
5119d5ccf9 | ||
|
|
91efd1d03d | ||
|
|
aa776226b0 | ||
|
|
e9435150e9 | ||
|
|
d399b966e6 | ||
|
|
f5f0e25384 | ||
|
|
04de33e5f7 | ||
|
|
37dfea25e1 | ||
|
|
e2166bc25f | ||
|
|
b5e8f039bf | ||
|
|
346e6d1cd8 | ||
|
|
be434d25e3 | ||
|
|
ecc201e9d4 | ||
|
|
67bfdf47a5 | ||
|
|
3fa22a6ba1 | ||
|
|
9f898f68db | ||
|
|
f78b05360a | ||
|
|
2f483b3084 | ||
|
|
9711d594db | ||
|
|
39aebfcb82 | ||
|
|
5415cac2f3 | ||
|
|
70d2364a6f | ||
|
|
b1ab45f662 | ||
|
|
20300edbb8 | ||
|
|
b7cfec0770 | ||
|
|
948a732dd5 | ||
|
|
bf4ceaf09e | ||
|
|
fda688b11a | ||
|
|
414b97b3c0 |
7
.gitignore
vendored
7
.gitignore
vendored
@@ -51,3 +51,10 @@ coverage/
|
|||||||
*.tmp
|
*.tmp
|
||||||
*.temp
|
*.temp
|
||||||
llm-scraper/
|
llm-scraper/
|
||||||
|
|
||||||
|
# Claude Code
|
||||||
|
.claude/
|
||||||
|
|
||||||
|
# Test/debug scripts
|
||||||
|
backend/scripts/test-*.ts
|
||||||
|
backend/scripts/run-*.ts
|
||||||
|
|||||||
189
.woodpecker.yml
Normal file
189
.woodpecker.yml
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
steps:
|
||||||
|
# ===========================================
|
||||||
|
# PR VALIDATION: Parallel type checks (PRs only)
|
||||||
|
# ===========================================
|
||||||
|
typecheck-backend:
|
||||||
|
image: node:22
|
||||||
|
commands:
|
||||||
|
- cd backend
|
||||||
|
- npm ci --prefer-offline
|
||||||
|
- npx tsc --noEmit
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
|
typecheck-cannaiq:
|
||||||
|
image: node:22
|
||||||
|
commands:
|
||||||
|
- cd cannaiq
|
||||||
|
- npm ci --prefer-offline
|
||||||
|
- npx tsc --noEmit
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
|
typecheck-findadispo:
|
||||||
|
image: node:22
|
||||||
|
commands:
|
||||||
|
- cd findadispo/frontend
|
||||||
|
- npm ci --prefer-offline
|
||||||
|
- npx tsc --noEmit 2>/dev/null || true
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
|
typecheck-findagram:
|
||||||
|
image: node:22
|
||||||
|
commands:
|
||||||
|
- cd findagram/frontend
|
||||||
|
- npm ci --prefer-offline
|
||||||
|
- npx tsc --noEmit 2>/dev/null || true
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# AUTO-MERGE: Merge PR after all checks pass
|
||||||
|
# ===========================================
|
||||||
|
auto-merge:
|
||||||
|
image: alpine:latest
|
||||||
|
environment:
|
||||||
|
GITEA_TOKEN:
|
||||||
|
from_secret: gitea_token
|
||||||
|
commands:
|
||||||
|
- apk add --no-cache curl
|
||||||
|
- |
|
||||||
|
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
||||||
|
curl -s -X POST \
|
||||||
|
-H "Authorization: token $GITEA_TOKEN" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"Do":"merge"}' \
|
||||||
|
"https://git.spdy.io/api/v1/repos/Creationshop/cannaiq/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||||
|
depends_on:
|
||||||
|
- typecheck-backend
|
||||||
|
- typecheck-cannaiq
|
||||||
|
- typecheck-findadispo
|
||||||
|
- typecheck-findagram
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# DOCKER: Multi-stage builds with layer caching
|
||||||
|
# ===========================================
|
||||||
|
docker-backend:
|
||||||
|
image: gcr.io/kaniko-project/executor:debug
|
||||||
|
commands:
|
||||||
|
- /kaniko/executor
|
||||||
|
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/backend
|
||||||
|
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/backend/Dockerfile
|
||||||
|
--destination=registry.spdy.io/cannaiq/backend:latest
|
||||||
|
--destination=registry.spdy.io/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8}
|
||||||
|
--build-arg=APP_BUILD_VERSION=sha-${CI_COMMIT_SHA:0:8}
|
||||||
|
--build-arg=APP_GIT_SHA=${CI_COMMIT_SHA}
|
||||||
|
--build-arg=APP_BUILD_TIME=${CI_PIPELINE_CREATED}
|
||||||
|
--cache=true
|
||||||
|
--cache-repo=registry.spdy.io/cannaiq/cache-backend
|
||||||
|
--cache-ttl=168h
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
branch: [master, develop]
|
||||||
|
event: push
|
||||||
|
|
||||||
|
docker-cannaiq:
|
||||||
|
image: gcr.io/kaniko-project/executor:debug
|
||||||
|
commands:
|
||||||
|
- /kaniko/executor
|
||||||
|
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/cannaiq
|
||||||
|
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/cannaiq/Dockerfile
|
||||||
|
--destination=registry.spdy.io/cannaiq/frontend:latest
|
||||||
|
--destination=registry.spdy.io/cannaiq/frontend:sha-${CI_COMMIT_SHA:0:8}
|
||||||
|
--cache=true
|
||||||
|
--cache-repo=registry.spdy.io/cannaiq/cache-cannaiq
|
||||||
|
--cache-ttl=168h
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
branch: [master, develop]
|
||||||
|
event: push
|
||||||
|
|
||||||
|
docker-findadispo:
|
||||||
|
image: gcr.io/kaniko-project/executor:debug
|
||||||
|
commands:
|
||||||
|
- /kaniko/executor
|
||||||
|
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findadispo/frontend
|
||||||
|
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findadispo/frontend/Dockerfile
|
||||||
|
--destination=registry.spdy.io/cannaiq/findadispo:latest
|
||||||
|
--destination=registry.spdy.io/cannaiq/findadispo:sha-${CI_COMMIT_SHA:0:8}
|
||||||
|
--cache=true
|
||||||
|
--cache-repo=registry.spdy.io/cannaiq/cache-findadispo
|
||||||
|
--cache-ttl=168h
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
branch: [master, develop]
|
||||||
|
event: push
|
||||||
|
|
||||||
|
docker-findagram:
|
||||||
|
image: gcr.io/kaniko-project/executor:debug
|
||||||
|
commands:
|
||||||
|
- /kaniko/executor
|
||||||
|
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findagram/frontend
|
||||||
|
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findagram/frontend/Dockerfile
|
||||||
|
--destination=registry.spdy.io/cannaiq/findagram:latest
|
||||||
|
--destination=registry.spdy.io/cannaiq/findagram:sha-${CI_COMMIT_SHA:0:8}
|
||||||
|
--cache=true
|
||||||
|
--cache-repo=registry.spdy.io/cannaiq/cache-findagram
|
||||||
|
--cache-ttl=168h
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
branch: [master, develop]
|
||||||
|
event: push
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# DEPLOY: Pull from local registry
|
||||||
|
# ===========================================
|
||||||
|
deploy:
|
||||||
|
image: bitnami/kubectl:latest
|
||||||
|
environment:
|
||||||
|
K8S_TOKEN:
|
||||||
|
from_secret: k8s_token
|
||||||
|
commands:
|
||||||
|
- mkdir -p ~/.kube
|
||||||
|
- |
|
||||||
|
cat > ~/.kube/config << KUBEEOF
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Config
|
||||||
|
clusters:
|
||||||
|
- cluster:
|
||||||
|
certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJkakNDQVIyZ0F3SUJBZ0lCQURBS0JnZ3Foa2pPUFFRREFqQWpNU0V3SHdZRFZRUUREQmhyTTNNdGMyVnkKZG1WeUxXTmhRREUzTmpVM05UUTNPRE13SGhjTk1qVXhNakUwTWpNeU5qSXpXaGNOTXpVeE1qRXlNak15TmpJegpXakFqTVNFd0h3WURWUVFEREJock0zTXRjMlZ5ZG1WeUxXTmhRREUzTmpVM05UUTNPRE13V1RBVEJnY3Foa2pPClBRSUJCZ2dxaGtqT1BRTUJCd05DQUFRWDRNdFJRTW5lWVJVV0s2cjZ3VEV2WjAxNnV4T3NUR3JJZ013TXVnNGwKajQ1bHZ6ZkM1WE1NY1pESnUxZ0t1dVJhVGxlb0xVOVJnSERIUUI4TUwzNTJvMEl3UURBT0JnTlZIUThCQWY4RQpCQU1DQXFRd0R3WURWUjBUQVFIL0JBVXdBd0VCL3pBZEJnTlZIUTRFRmdRVXIzNDZpNE42TFhzaEZsREhvSlU0CjJ1RjZseGN3Q2dZSUtvWkl6ajBFQXdJRFJ3QXdSQUlnVUtqdWRFQWJyS1JDVHROVXZTc1Rmb3FEaHFSeDM5MkYKTFFSVWlKK0hCVElDSUJqOFIxbG1zSnFSRkRHMEpwMGN4OG5ZZnFCaElRQzh6WWdRdTdBZmR4L3IKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=
|
||||||
|
server: https://10.100.6.10:6443
|
||||||
|
name: spdy-k3s
|
||||||
|
contexts:
|
||||||
|
- context:
|
||||||
|
cluster: spdy-k3s
|
||||||
|
namespace: cannaiq
|
||||||
|
user: cannaiq-admin
|
||||||
|
name: cannaiq
|
||||||
|
current-context: cannaiq
|
||||||
|
users:
|
||||||
|
- name: cannaiq-admin
|
||||||
|
user:
|
||||||
|
token: $K8S_TOKEN
|
||||||
|
KUBEEOF
|
||||||
|
- chmod 600 ~/.kube/config
|
||||||
|
# Apply manifests to ensure probes and resource limits are set
|
||||||
|
- kubectl apply -f /woodpecker/src/git.spdy.io/Creationshop/cannaiq/k8s/scraper.yaml
|
||||||
|
- kubectl apply -f /woodpecker/src/git.spdy.io/Creationshop/cannaiq/k8s/scraper-worker.yaml
|
||||||
|
- kubectl set image deployment/scraper scraper=registry.spdy.io/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||||
|
- kubectl rollout status deployment/scraper -n cannaiq --timeout=300s
|
||||||
|
- kubectl set image deployment/scraper-worker worker=registry.spdy.io/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||||
|
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=registry.spdy.io/cannaiq/frontend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||||
|
- kubectl set image deployment/findadispo-frontend findadispo-frontend=registry.spdy.io/cannaiq/findadispo:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||||
|
- kubectl set image deployment/findagram-frontend findagram-frontend=registry.spdy.io/cannaiq/findagram:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||||
|
- kubectl rollout status deployment/cannaiq-frontend -n cannaiq --timeout=300s
|
||||||
|
depends_on:
|
||||||
|
- docker-backend
|
||||||
|
- docker-cannaiq
|
||||||
|
- docker-findadispo
|
||||||
|
- docker-findagram
|
||||||
|
when:
|
||||||
|
branch: [master, develop]
|
||||||
|
event: push
|
||||||
@@ -1,140 +0,0 @@
|
|||||||
when:
|
|
||||||
- event: [push, pull_request]
|
|
||||||
|
|
||||||
steps:
|
|
||||||
# Build checks
|
|
||||||
typecheck-backend:
|
|
||||||
image: node:20
|
|
||||||
commands:
|
|
||||||
- cd backend
|
|
||||||
- npm ci
|
|
||||||
- npx tsc --noEmit || true
|
|
||||||
|
|
||||||
build-cannaiq:
|
|
||||||
image: node:20
|
|
||||||
commands:
|
|
||||||
- cd cannaiq
|
|
||||||
- npm ci
|
|
||||||
- npx tsc --noEmit
|
|
||||||
- npm run build
|
|
||||||
|
|
||||||
build-findadispo:
|
|
||||||
image: node:20
|
|
||||||
commands:
|
|
||||||
- cd findadispo/frontend
|
|
||||||
- npm ci
|
|
||||||
- npm run build
|
|
||||||
|
|
||||||
build-findagram:
|
|
||||||
image: node:20
|
|
||||||
commands:
|
|
||||||
- cd findagram/frontend
|
|
||||||
- npm ci
|
|
||||||
- npm run build
|
|
||||||
|
|
||||||
# Docker builds - only on master
|
|
||||||
docker-backend:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/dispensary-scraper
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: backend/Dockerfile
|
|
||||||
context: backend
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
docker-cannaiq:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/cannaiq-frontend
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: cannaiq/Dockerfile
|
|
||||||
context: cannaiq
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
docker-findadispo:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/findadispo-frontend
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: findadispo/frontend/Dockerfile
|
|
||||||
context: findadispo/frontend
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
docker-findagram:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/findagram-frontend
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: findagram/frontend/Dockerfile
|
|
||||||
context: findagram/frontend
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
# Deploy to Kubernetes
|
|
||||||
deploy:
|
|
||||||
image: bitnami/kubectl:latest
|
|
||||||
environment:
|
|
||||||
KUBECONFIG_CONTENT:
|
|
||||||
from_secret: kubeconfig_data
|
|
||||||
commands:
|
|
||||||
- echo "Deploying to Kubernetes..."
|
|
||||||
- mkdir -p ~/.kube
|
|
||||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
|
||||||
- chmod 600 ~/.kube/config
|
|
||||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl set image deployment/scraper-worker scraper-worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
|
||||||
- kubectl rollout status deployment/scraper-worker -n dispensary-scraper --timeout=300s
|
|
||||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
|
||||||
- kubectl rollout status deployment/findadispo-frontend -n dispensary-scraper --timeout=120s
|
|
||||||
- kubectl rollout status deployment/findagram-frontend -n dispensary-scraper --timeout=120s
|
|
||||||
- echo "All deployments complete!"
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
40
backend/.env
40
backend/.env
@@ -1,30 +1,52 @@
|
|||||||
|
# CannaiQ Backend Environment Configuration
|
||||||
|
# Copy this file to .env and fill in the values
|
||||||
|
|
||||||
|
# Server
|
||||||
PORT=3010
|
PORT=3010
|
||||||
NODE_ENV=development
|
NODE_ENV=development
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# CannaiQ Database (dutchie_menus) - PRIMARY DATABASE
|
# CANNAIQ DATABASE (dutchie_menus) - PRIMARY DATABASE
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# This is where all schema migrations run and where canonical tables live.
|
# This is where ALL schema migrations run and where canonical tables live.
|
||||||
# All CANNAIQ_DB_* variables are REQUIRED - connection will fail if missing.
|
# All CANNAIQ_DB_* variables are REQUIRED - no defaults.
|
||||||
|
# The application will fail to start if any are missing.
|
||||||
|
|
||||||
CANNAIQ_DB_HOST=localhost
|
CANNAIQ_DB_HOST=localhost
|
||||||
CANNAIQ_DB_PORT=54320
|
CANNAIQ_DB_PORT=54320
|
||||||
CANNAIQ_DB_NAME=dutchie_menus
|
CANNAIQ_DB_NAME=dutchie_menus # MUST be dutchie_menus - NOT dutchie_legacy
|
||||||
CANNAIQ_DB_USER=dutchie
|
CANNAIQ_DB_USER=dutchie
|
||||||
CANNAIQ_DB_PASS=dutchie_local_pass
|
CANNAIQ_DB_PASS=dutchie_local_pass
|
||||||
|
|
||||||
|
# Alternative: Use a full connection URL instead of individual vars
|
||||||
|
# If set, this takes priority over individual vars above
|
||||||
|
# CANNAIQ_DB_URL=postgresql://user:pass@host:port/dutchie_menus
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Legacy Database (dutchie_legacy) - READ-ONLY SOURCE
|
# LEGACY DATABASE (dutchie_legacy) - READ-ONLY FOR ETL
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Used ONLY by ETL scripts to read historical data.
|
# Used ONLY by ETL scripts to read historical data.
|
||||||
# NEVER run migrations against this database.
|
# NEVER run migrations against this database.
|
||||||
|
# These are only needed when running 042_legacy_import.ts
|
||||||
|
|
||||||
LEGACY_DB_HOST=localhost
|
LEGACY_DB_HOST=localhost
|
||||||
LEGACY_DB_PORT=54320
|
LEGACY_DB_PORT=54320
|
||||||
LEGACY_DB_NAME=dutchie_legacy
|
LEGACY_DB_NAME=dutchie_legacy # READ-ONLY - never migrated
|
||||||
LEGACY_DB_USER=dutchie
|
LEGACY_DB_USER=dutchie
|
||||||
LEGACY_DB_PASS=dutchie_local_pass
|
LEGACY_DB_PASS=
|
||||||
|
|
||||||
# Local image storage (no MinIO per CLAUDE.md)
|
# Alternative: Use a full connection URL instead of individual vars
|
||||||
|
# LEGACY_DB_URL=postgresql://user:pass@host:port/dutchie_legacy
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# LOCAL STORAGE
|
||||||
|
# =============================================================================
|
||||||
|
# Local image storage path (no MinIO)
|
||||||
LOCAL_IMAGES_PATH=./public/images
|
LOCAL_IMAGES_PATH=./public/images
|
||||||
|
|
||||||
# JWT
|
# =============================================================================
|
||||||
|
# AUTHENTICATION
|
||||||
|
# =============================================================================
|
||||||
JWT_SECRET=your-secret-key-change-in-production
|
JWT_SECRET=your-secret-key-change-in-production
|
||||||
|
ANTHROPIC_API_KEY=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
|
||||||
|
OPENAI_API_KEY=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
|
||||||
|
|||||||
3
backend/.gitignore
vendored
Normal file
3
backend/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
|
||||||
|
# IP2Location database (downloaded separately)
|
||||||
|
data/ip2location/
|
||||||
@@ -1,17 +1,33 @@
|
|||||||
# Build stage
|
# Build stage
|
||||||
# Image: code.cannabrands.app/creationshop/dispensary-scraper
|
# Image: git.spdy.io/creationshop/dispensary-scraper
|
||||||
FROM node:20-slim AS builder
|
FROM node:22-slim AS builder
|
||||||
|
|
||||||
|
# Install build tools for native modules (bcrypt, sharp)
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
python3 \
|
||||||
|
build-essential \
|
||||||
|
--no-install-recommends \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm ci
|
|
||||||
|
# Install dependencies with retry and fallback registry
|
||||||
|
RUN npm config set fetch-retries 3 && \
|
||||||
|
npm config set fetch-retry-mintimeout 20000 && \
|
||||||
|
npm config set fetch-retry-maxtimeout 120000 && \
|
||||||
|
npm install || \
|
||||||
|
(npm config set registry https://registry.npmmirror.com && npm install)
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
|
# Prune dev dependencies for smaller production image
|
||||||
|
RUN npm prune --production
|
||||||
|
|
||||||
# Production stage
|
# Production stage
|
||||||
FROM node:20-slim
|
FROM node:22-slim
|
||||||
|
|
||||||
# Build arguments for version info
|
# Build arguments for version info
|
||||||
ARG APP_BUILD_VERSION=dev
|
ARG APP_BUILD_VERSION=dev
|
||||||
@@ -25,8 +41,10 @@ ENV APP_GIT_SHA=${APP_GIT_SHA}
|
|||||||
ENV APP_BUILD_TIME=${APP_BUILD_TIME}
|
ENV APP_BUILD_TIME=${APP_BUILD_TIME}
|
||||||
ENV CONTAINER_IMAGE_TAG=${CONTAINER_IMAGE_TAG}
|
ENV CONTAINER_IMAGE_TAG=${CONTAINER_IMAGE_TAG}
|
||||||
|
|
||||||
# Install Chromium dependencies
|
# Install Chromium dependencies, curl, and CA certificates for HTTPS
|
||||||
RUN apt-get update && apt-get install -y \
|
RUN apt-get update && apt-get install -y \
|
||||||
|
curl \
|
||||||
|
ca-certificates \
|
||||||
chromium \
|
chromium \
|
||||||
fonts-liberation \
|
fonts-liberation \
|
||||||
libnss3 \
|
libnss3 \
|
||||||
@@ -43,10 +61,12 @@ ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm ci --omit=dev
|
COPY --from=builder /app/node_modules ./node_modules
|
||||||
|
|
||||||
COPY --from=builder /app/dist ./dist
|
COPY --from=builder /app/dist ./dist
|
||||||
|
|
||||||
|
# Copy migrations for auto-migrate on startup
|
||||||
|
COPY migrations ./migrations
|
||||||
|
|
||||||
# Create local images directory for when MinIO is not configured
|
# Create local images directory for when MinIO is not configured
|
||||||
RUN mkdir -p /app/public/images/products
|
RUN mkdir -p /app/public/images/products
|
||||||
|
|
||||||
|
|||||||
268
backend/docs/CODEBASE_MAP.md
Normal file
268
backend/docs/CODEBASE_MAP.md
Normal file
@@ -0,0 +1,268 @@
|
|||||||
|
# CannaiQ Backend Codebase Map
|
||||||
|
|
||||||
|
**Last Updated:** 2025-12-12
|
||||||
|
**Purpose:** Help Claude and developers understand which code is current vs deprecated
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick Reference: What to Use
|
||||||
|
|
||||||
|
### For Crawling/Scraping
|
||||||
|
| Task | Use This | NOT This |
|
||||||
|
|------|----------|----------|
|
||||||
|
| Fetch products | `src/tasks/handlers/payload-fetch.ts` | `src/hydration/*` |
|
||||||
|
| Process products | `src/tasks/handlers/product-refresh.ts` | `src/scraper-v2/*` |
|
||||||
|
| GraphQL client | `src/platforms/dutchie/client.ts` | `src/dutchie-az/services/graphql-client.ts` |
|
||||||
|
| Worker system | `src/tasks/task-worker.ts` | `src/dutchie-az/services/worker.ts` |
|
||||||
|
|
||||||
|
### For Database
|
||||||
|
| Task | Use This | NOT This |
|
||||||
|
|------|----------|----------|
|
||||||
|
| Get DB pool | `src/db/pool.ts` | `src/dutchie-az/db/connection.ts` |
|
||||||
|
| Run migrations | `src/db/migrate.ts` (CLI only) | Never import at runtime |
|
||||||
|
| Query products | `store_products` table | `products`, `dutchie_products` |
|
||||||
|
| Query stores | `dispensaries` table | `stores` table |
|
||||||
|
|
||||||
|
### For Discovery
|
||||||
|
| Task | Use This |
|
||||||
|
|------|----------|
|
||||||
|
| Discover stores | `src/discovery/*.ts` |
|
||||||
|
| Run discovery | `npx tsx src/scripts/run-discovery.ts` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Directory Status
|
||||||
|
|
||||||
|
### ACTIVE DIRECTORIES (Use These)
|
||||||
|
|
||||||
|
```
|
||||||
|
src/
|
||||||
|
├── auth/ # JWT/session auth, middleware
|
||||||
|
├── db/ # Database pool, migrations
|
||||||
|
├── discovery/ # Dutchie store discovery pipeline
|
||||||
|
├── middleware/ # Express middleware
|
||||||
|
├── multi-state/ # Multi-state query support
|
||||||
|
├── platforms/ # Platform-specific clients (Dutchie, Jane, etc)
|
||||||
|
│ └── dutchie/ # THE Dutchie client - use this one
|
||||||
|
├── routes/ # Express API routes
|
||||||
|
├── services/ # Core services (logger, scheduler, etc)
|
||||||
|
├── tasks/ # Task system (workers, handlers, scheduler)
|
||||||
|
│ └── handlers/ # Task handlers (payload_fetch, product_refresh, etc)
|
||||||
|
├── types/ # TypeScript types
|
||||||
|
└── utils/ # Utilities (storage, image processing)
|
||||||
|
```
|
||||||
|
|
||||||
|
### DEPRECATED DIRECTORIES (DO NOT USE)
|
||||||
|
|
||||||
|
```
|
||||||
|
src/
|
||||||
|
├── hydration/ # DEPRECATED - Old pipeline approach
|
||||||
|
├── scraper-v2/ # DEPRECATED - Old scraper engine
|
||||||
|
├── canonical-hydration/# DEPRECATED - Merged into tasks/handlers
|
||||||
|
├── dutchie-az/ # PARTIAL - Some parts deprecated, some active
|
||||||
|
│ ├── db/ # DEPRECATED - Use src/db/pool.ts
|
||||||
|
│ └── services/ # PARTIAL - worker.ts still runs, graphql-client.ts deprecated
|
||||||
|
├── portals/ # FUTURE - Not yet implemented
|
||||||
|
├── seo/ # PARTIAL - Settings work, templates WIP
|
||||||
|
└── system/ # DEPRECATED - Old orchestration system
|
||||||
|
```
|
||||||
|
|
||||||
|
### DEPRECATED FILES (DO NOT USE)
|
||||||
|
|
||||||
|
```
|
||||||
|
src/dutchie-az/db/connection.ts # Use src/db/pool.ts instead
|
||||||
|
src/dutchie-az/services/graphql-client.ts # Use src/platforms/dutchie/client.ts
|
||||||
|
src/hydration/*.ts # Entire directory deprecated
|
||||||
|
src/scraper-v2/*.ts # Entire directory deprecated
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Key Files Reference
|
||||||
|
|
||||||
|
### Entry Points
|
||||||
|
| File | Purpose | Status |
|
||||||
|
|------|---------|--------|
|
||||||
|
| `src/index.ts` | Main Express server | ACTIVE |
|
||||||
|
| `src/dutchie-az/services/worker.ts` | Worker process entry | ACTIVE |
|
||||||
|
| `src/tasks/task-worker.ts` | Task worker (new system) | ACTIVE |
|
||||||
|
|
||||||
|
### Dutchie Integration
|
||||||
|
| File | Purpose | Status |
|
||||||
|
|------|---------|--------|
|
||||||
|
| `src/platforms/dutchie/client.ts` | GraphQL client, hashes, curl | **PRIMARY** |
|
||||||
|
| `src/platforms/dutchie/queries.ts` | High-level query functions | ACTIVE |
|
||||||
|
| `src/platforms/dutchie/index.ts` | Re-exports | ACTIVE |
|
||||||
|
|
||||||
|
### Task Handlers
|
||||||
|
| File | Purpose | Status |
|
||||||
|
|------|---------|--------|
|
||||||
|
| `src/tasks/handlers/payload-fetch.ts` | Fetch products from Dutchie | **PRIMARY** |
|
||||||
|
| `src/tasks/handlers/product-refresh.ts` | Process payload into DB | **PRIMARY** |
|
||||||
|
| `src/tasks/handlers/entry-point-discovery.ts` | Resolve platform IDs (auto-healing) | **PRIMARY** |
|
||||||
|
| `src/tasks/handlers/menu-detection.ts` | Detect menu type | ACTIVE |
|
||||||
|
| `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs (legacy) | LEGACY |
|
||||||
|
| `src/tasks/handlers/image-download.ts` | Download product images | ACTIVE |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Transport Rules (CRITICAL)
|
||||||
|
|
||||||
|
**Browser-based (Puppeteer) is the DEFAULT transport. curl is ONLY allowed when explicitly specified.**
|
||||||
|
|
||||||
|
### Transport Selection
|
||||||
|
| `task.method` | Transport Used | Notes |
|
||||||
|
|---------------|----------------|-------|
|
||||||
|
| `null` | Browser (Puppeteer) | DEFAULT - use this for most tasks |
|
||||||
|
| `'http'` | Browser (Puppeteer) | Explicit browser request |
|
||||||
|
| `'curl'` | curl-impersonate | ONLY when explicitly needed |
|
||||||
|
|
||||||
|
### Why Browser-First?
|
||||||
|
1. **Anti-detection**: Puppeteer with StealthPlugin evades bot detection
|
||||||
|
2. **Session cookies**: Browser maintains session state automatically
|
||||||
|
3. **Fingerprinting**: Real browser fingerprint (TLS, headers, etc.)
|
||||||
|
4. **Age gates**: Browser can click through age verification
|
||||||
|
|
||||||
|
### Entry Point Discovery Auto-Healing
|
||||||
|
The `entry_point_discovery` handler uses a healing strategy:
|
||||||
|
|
||||||
|
```
|
||||||
|
1. FIRST: Check dutchie_discovery_locations for existing platform_location_id
|
||||||
|
- By linked dutchie_discovery_id
|
||||||
|
- By slug match in discovery data
|
||||||
|
→ If found, NO network call needed
|
||||||
|
|
||||||
|
2. SECOND: Browser-based GraphQL (Puppeteer)
|
||||||
|
- 5x retries for network/proxy failures
|
||||||
|
- On HTTP 403: rotate proxy and retry
|
||||||
|
- On HTTP 404 after 2 attempts: mark as 'removed'
|
||||||
|
|
||||||
|
3. HARD FAILURE: After exhausting options → 'needs_investigation'
|
||||||
|
```
|
||||||
|
|
||||||
|
### DO NOT Use curl Unless:
|
||||||
|
- Task explicitly has `method = 'curl'`
|
||||||
|
- You're testing curl-impersonate binaries
|
||||||
|
- The API explicitly requires curl fingerprinting
|
||||||
|
|
||||||
|
### Files
|
||||||
|
| File | Transport | Purpose |
|
||||||
|
|------|-----------|---------|
|
||||||
|
| `src/services/puppeteer-preflight.ts` | Browser | Preflight check |
|
||||||
|
| `src/services/curl-preflight.ts` | curl | Preflight check |
|
||||||
|
| `src/tasks/handlers/entry-point-discovery.ts` | Browser | Platform ID resolution |
|
||||||
|
| `src/tasks/handlers/payload-fetch.ts` | Both | Product fetching |
|
||||||
|
|
||||||
|
### Database
|
||||||
|
| File | Purpose | Status |
|
||||||
|
|------|---------|--------|
|
||||||
|
| `src/db/pool.ts` | Canonical DB pool | **PRIMARY** |
|
||||||
|
| `src/db/migrate.ts` | Migration runner (CLI only) | CLI ONLY |
|
||||||
|
| `src/db/auto-migrate.ts` | Auto-run migrations on startup | ACTIVE |
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
| File | Purpose | Status |
|
||||||
|
|------|---------|--------|
|
||||||
|
| `.env` | Environment variables | ACTIVE |
|
||||||
|
| `package.json` | Dependencies | ACTIVE |
|
||||||
|
| `tsconfig.json` | TypeScript config | ACTIVE |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## GraphQL Hashes (CRITICAL)
|
||||||
|
|
||||||
|
The correct hashes are in `src/platforms/dutchie/client.ts`:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
export const GRAPHQL_HASHES = {
|
||||||
|
FilteredProducts: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0',
|
||||||
|
GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
|
||||||
|
ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b',
|
||||||
|
GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
**ALWAYS** use `Status: 'Active'` for FilteredProducts (not `null` or `'All'`).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Scripts Reference
|
||||||
|
|
||||||
|
### Useful Scripts (in `src/scripts/`)
|
||||||
|
| Script | Purpose |
|
||||||
|
|--------|---------|
|
||||||
|
| `run-discovery.ts` | Run Dutchie discovery |
|
||||||
|
| `crawl-single-store.ts` | Test crawl a single store |
|
||||||
|
| `test-dutchie-graphql.ts` | Test GraphQL queries |
|
||||||
|
|
||||||
|
### One-Off Scripts (probably don't need)
|
||||||
|
| Script | Purpose |
|
||||||
|
|--------|---------|
|
||||||
|
| `harmonize-az-dispensaries.ts` | One-time data cleanup |
|
||||||
|
| `bootstrap-stores-for-dispensaries.ts` | One-time migration |
|
||||||
|
| `backfill-*.ts` | Historical backfill scripts |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## API Routes
|
||||||
|
|
||||||
|
### Active Routes (in `src/routes/`)
|
||||||
|
| Route File | Mount Point | Purpose |
|
||||||
|
|------------|-------------|---------|
|
||||||
|
| `auth.ts` | `/api/auth` | Login/logout/session |
|
||||||
|
| `stores.ts` | `/api/stores` | Store CRUD |
|
||||||
|
| `dashboard.ts` | `/api/dashboard` | Dashboard stats |
|
||||||
|
| `workers.ts` | `/api/workers` | Worker monitoring |
|
||||||
|
| `pipeline.ts` | `/api/pipeline` | Crawl triggers |
|
||||||
|
| `discovery.ts` | `/api/discovery` | Discovery management |
|
||||||
|
| `analytics.ts` | `/api/analytics` | Analytics queries |
|
||||||
|
| `wordpress.ts` | `/api/v1/wordpress` | WordPress plugin API |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Documentation Files
|
||||||
|
|
||||||
|
### Current Docs (in `backend/docs/`)
|
||||||
|
| Doc | Purpose | Currency |
|
||||||
|
|-----|---------|----------|
|
||||||
|
| `TASK_WORKFLOW_2024-12-10.md` | Task system architecture | CURRENT |
|
||||||
|
| `WORKER_TASK_ARCHITECTURE.md` | Worker/task design | CURRENT |
|
||||||
|
| `CRAWL_PIPELINE.md` | Crawl pipeline overview | CURRENT |
|
||||||
|
| `ORGANIC_SCRAPING_GUIDE.md` | Browser-based scraping | CURRENT |
|
||||||
|
| `CODEBASE_MAP.md` | This file | CURRENT |
|
||||||
|
| `ANALYTICS_V2_EXAMPLES.md` | Analytics API examples | CURRENT |
|
||||||
|
| `BRAND_INTELLIGENCE_API.md` | Brand API docs | CURRENT |
|
||||||
|
|
||||||
|
### Root Docs
|
||||||
|
| Doc | Purpose | Currency |
|
||||||
|
|-----|---------|----------|
|
||||||
|
| `CLAUDE.md` | Claude instructions | **PRIMARY** |
|
||||||
|
| `README.md` | Project overview | NEEDS UPDATE |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Common Mistakes to Avoid
|
||||||
|
|
||||||
|
1. **Don't use `src/hydration/`** - It's an old approach that was superseded by the task system
|
||||||
|
|
||||||
|
2. **Don't use `src/dutchie-az/db/connection.ts`** - Use `src/db/pool.ts` instead
|
||||||
|
|
||||||
|
3. **Don't import `src/db/migrate.ts` at runtime** - It will crash. Only use for CLI migrations.
|
||||||
|
|
||||||
|
4. **Don't query `stores` table** - It's empty. Use `dispensaries`.
|
||||||
|
|
||||||
|
5. **Don't query `products` table** - It's empty. Use `store_products`.
|
||||||
|
|
||||||
|
6. **Don't use wrong GraphQL hash** - Always get hash from `GRAPHQL_HASHES` in client.ts
|
||||||
|
|
||||||
|
7. **Don't use `Status: null`** - It returns 0 products. Use `Status: 'Active'`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## When in Doubt
|
||||||
|
|
||||||
|
1. Check if the file is imported in `src/index.ts` - if not, it may be deprecated
|
||||||
|
2. Check the last modified date - older files may be stale
|
||||||
|
3. Look for `DEPRECATED` comments in the code
|
||||||
|
4. Ask: "Is there a newer version of this in `src/tasks/` or `src/platforms/`?"
|
||||||
|
5. Read the relevant doc in `docs/` before modifying code
|
||||||
343
backend/docs/QUERY_API.md
Normal file
343
backend/docs/QUERY_API.md
Normal file
@@ -0,0 +1,343 @@
|
|||||||
|
# CannaiQ Query API
|
||||||
|
|
||||||
|
Query raw crawl payload data with flexible filters, sorting, and aggregation.
|
||||||
|
|
||||||
|
## Base URL
|
||||||
|
|
||||||
|
```
|
||||||
|
https://cannaiq.co/api/payloads
|
||||||
|
```
|
||||||
|
|
||||||
|
## Authentication
|
||||||
|
|
||||||
|
Include your API key in the header:
|
||||||
|
```
|
||||||
|
X-API-Key: your-api-key
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Endpoints
|
||||||
|
|
||||||
|
### 1. Query Products
|
||||||
|
|
||||||
|
Filter and search products from a store's latest crawl data.
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/{dispensaryId}/query
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Query Parameters
|
||||||
|
|
||||||
|
| Parameter | Type | Description |
|
||||||
|
|-----------|------|-------------|
|
||||||
|
| `brand` | string | Filter by brand name (partial match) |
|
||||||
|
| `category` | string | Filter by category (flower, vape, edible, etc.) |
|
||||||
|
| `subcategory` | string | Filter by subcategory |
|
||||||
|
| `strain_type` | string | Filter by strain (indica, sativa, hybrid, cbd) |
|
||||||
|
| `in_stock` | boolean | Filter by stock status (true/false) |
|
||||||
|
| `price_min` | number | Minimum price |
|
||||||
|
| `price_max` | number | Maximum price |
|
||||||
|
| `thc_min` | number | Minimum THC percentage |
|
||||||
|
| `thc_max` | number | Maximum THC percentage |
|
||||||
|
| `search` | string | Search product name (partial match) |
|
||||||
|
| `fields` | string | Comma-separated fields to return |
|
||||||
|
| `limit` | number | Max results (default 100, max 1000) |
|
||||||
|
| `offset` | number | Skip results for pagination |
|
||||||
|
| `sort` | string | Sort by: name, price, thc, brand |
|
||||||
|
| `order` | string | Sort order: asc, desc |
|
||||||
|
|
||||||
|
#### Available Fields
|
||||||
|
|
||||||
|
When using `fields` parameter, you can request:
|
||||||
|
- `id` - Product ID
|
||||||
|
- `name` - Product name
|
||||||
|
- `brand` - Brand name
|
||||||
|
- `category` - Product category
|
||||||
|
- `subcategory` - Product subcategory
|
||||||
|
- `strain_type` - Indica/Sativa/Hybrid/CBD
|
||||||
|
- `price` - Current price
|
||||||
|
- `price_med` - Medical price
|
||||||
|
- `price_rec` - Recreational price
|
||||||
|
- `thc` - THC percentage
|
||||||
|
- `cbd` - CBD percentage
|
||||||
|
- `weight` - Product weight/size
|
||||||
|
- `status` - Stock status
|
||||||
|
- `in_stock` - Boolean in-stock flag
|
||||||
|
- `image_url` - Product image
|
||||||
|
- `description` - Product description
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
**Get all flower products under $40:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/query?category=flower&price_max=40
|
||||||
|
```
|
||||||
|
|
||||||
|
**Search for "Blue Dream" with high THC:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/query?search=blue+dream&thc_min=20
|
||||||
|
```
|
||||||
|
|
||||||
|
**Get only name and price for Alien Labs products:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/query?brand=Alien+Labs&fields=name,price,thc
|
||||||
|
```
|
||||||
|
|
||||||
|
**Get top 10 highest THC products:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/query?sort=thc&order=desc&limit=10
|
||||||
|
```
|
||||||
|
|
||||||
|
**Paginate through in-stock products:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/query?in_stock=true&limit=50&offset=0
|
||||||
|
GET /api/payloads/store/112/query?in_stock=true&limit=50&offset=50
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"dispensaryId": 112,
|
||||||
|
"payloadId": 45,
|
||||||
|
"fetchedAt": "2025-12-11T10:30:00Z",
|
||||||
|
"query": {
|
||||||
|
"filters": {
|
||||||
|
"brand": "Alien Labs",
|
||||||
|
"category": null,
|
||||||
|
"price_max": null
|
||||||
|
},
|
||||||
|
"sort": "price",
|
||||||
|
"order": "asc",
|
||||||
|
"limit": 100,
|
||||||
|
"offset": 0
|
||||||
|
},
|
||||||
|
"pagination": {
|
||||||
|
"total": 15,
|
||||||
|
"returned": 15,
|
||||||
|
"limit": 100,
|
||||||
|
"offset": 0,
|
||||||
|
"has_more": false
|
||||||
|
},
|
||||||
|
"products": [
|
||||||
|
{
|
||||||
|
"id": "507f1f77bcf86cd799439011",
|
||||||
|
"name": "Alien Labs - Baklava 3.5g",
|
||||||
|
"brand": "Alien Labs",
|
||||||
|
"category": "flower",
|
||||||
|
"strain_type": "hybrid",
|
||||||
|
"price": 55,
|
||||||
|
"thc": "28.5",
|
||||||
|
"in_stock": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2. Aggregate Data
|
||||||
|
|
||||||
|
Group products and calculate metrics.
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/{dispensaryId}/aggregate
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Query Parameters
|
||||||
|
|
||||||
|
| Parameter | Type | Description |
|
||||||
|
|-----------|------|-------------|
|
||||||
|
| `group_by` | string | **Required.** Field to group by: brand, category, subcategory, strain_type |
|
||||||
|
| `metrics` | string | Comma-separated metrics (default: count) |
|
||||||
|
|
||||||
|
#### Available Metrics
|
||||||
|
|
||||||
|
- `count` - Number of products
|
||||||
|
- `avg_price` - Average price
|
||||||
|
- `min_price` - Lowest price
|
||||||
|
- `max_price` - Highest price
|
||||||
|
- `avg_thc` - Average THC percentage
|
||||||
|
- `in_stock_count` - Number of in-stock products
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
**Count products by brand:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/aggregate?group_by=brand
|
||||||
|
```
|
||||||
|
|
||||||
|
**Get price stats by category:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/aggregate?group_by=category&metrics=count,avg_price,min_price,max_price
|
||||||
|
```
|
||||||
|
|
||||||
|
**Get THC averages by strain type:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/aggregate?group_by=strain_type&metrics=count,avg_thc
|
||||||
|
```
|
||||||
|
|
||||||
|
**Brand analysis with stock info:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/aggregate?group_by=brand&metrics=count,avg_price,in_stock_count
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"dispensaryId": 112,
|
||||||
|
"payloadId": 45,
|
||||||
|
"fetchedAt": "2025-12-11T10:30:00Z",
|
||||||
|
"groupBy": "brand",
|
||||||
|
"metrics": ["count", "avg_price"],
|
||||||
|
"totalProducts": 450,
|
||||||
|
"groupCount": 85,
|
||||||
|
"aggregations": [
|
||||||
|
{
|
||||||
|
"brand": "Alien Labs",
|
||||||
|
"count": 15,
|
||||||
|
"avg_price": 52.33
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"brand": "Connected",
|
||||||
|
"count": 12,
|
||||||
|
"avg_price": 48.50
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3. Compare Stores (Price Comparison)
|
||||||
|
|
||||||
|
Query the same data from multiple stores and compare in your app:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Get flower prices from Store A
|
||||||
|
const storeA = await fetch('/api/payloads/store/112/query?category=flower&fields=name,brand,price');
|
||||||
|
|
||||||
|
// Get flower prices from Store B
|
||||||
|
const storeB = await fetch('/api/payloads/store/115/query?category=flower&fields=name,brand,price');
|
||||||
|
|
||||||
|
// Compare in your app
|
||||||
|
const dataA = await storeA.json();
|
||||||
|
const dataB = await storeB.json();
|
||||||
|
|
||||||
|
// Find matching products and compare prices
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 4. Price History
|
||||||
|
|
||||||
|
For historical price data, use the snapshots endpoint:
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/v1/products/{productId}/history?days=30
|
||||||
|
```
|
||||||
|
|
||||||
|
Or compare payloads over time:
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/{dispensaryId}/diff?from={payloadId1}&to={payloadId2}
|
||||||
|
```
|
||||||
|
|
||||||
|
The diff endpoint shows:
|
||||||
|
- Products added
|
||||||
|
- Products removed
|
||||||
|
- Price changes
|
||||||
|
- Stock changes
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 5. List Stores
|
||||||
|
|
||||||
|
Get available dispensaries to query:
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/stores
|
||||||
|
```
|
||||||
|
|
||||||
|
Returns all stores with their IDs, names, and locations.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Use Cases
|
||||||
|
|
||||||
|
### Price Comparison App
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// 1. Get stores in Arizona
|
||||||
|
const stores = await fetch('/api/stores?state=AZ').then(r => r.json());
|
||||||
|
|
||||||
|
// 2. Query flower prices from each store
|
||||||
|
const prices = await Promise.all(
|
||||||
|
stores.map(store =>
|
||||||
|
fetch(`/api/payloads/store/${store.id}/query?category=flower&fields=name,brand,price`)
|
||||||
|
.then(r => r.json())
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// 3. Build comparison matrix in your app
|
||||||
|
```
|
||||||
|
|
||||||
|
### Brand Analytics Dashboard
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Get brand presence across stores
|
||||||
|
const brandData = await Promise.all(
|
||||||
|
storeIds.map(id =>
|
||||||
|
fetch(`/api/payloads/store/${id}/aggregate?group_by=brand&metrics=count,avg_price`)
|
||||||
|
.then(r => r.json())
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Aggregate brand presence across all stores
|
||||||
|
```
|
||||||
|
|
||||||
|
### Deal Finder
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Find high-THC flower under $30
|
||||||
|
const deals = await fetch(
|
||||||
|
'/api/payloads/store/112/query?category=flower&price_max=30&thc_min=20&in_stock=true&sort=thc&order=desc'
|
||||||
|
).then(r => r.json());
|
||||||
|
```
|
||||||
|
|
||||||
|
### Inventory Tracker
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Get products that went out of stock
|
||||||
|
const diff = await fetch('/api/payloads/store/112/diff').then(r => r.json());
|
||||||
|
|
||||||
|
const outOfStock = diff.details.stockChanges.filter(
|
||||||
|
p => p.newStatus !== 'Active'
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Rate Limits
|
||||||
|
|
||||||
|
- Default: 100 requests/minute per API key
|
||||||
|
- Contact support for higher limits
|
||||||
|
|
||||||
|
## Error Responses
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"success": false,
|
||||||
|
"error": "Error message here"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Common errors:
|
||||||
|
- `404` - Store or payload not found
|
||||||
|
- `400` - Missing required parameter
|
||||||
|
- `401` - Invalid or missing API key
|
||||||
|
- `429` - Rate limit exceeded
|
||||||
394
backend/docs/_archive/BRAND_INTELLIGENCE_API.md
Normal file
394
backend/docs/_archive/BRAND_INTELLIGENCE_API.md
Normal file
@@ -0,0 +1,394 @@
|
|||||||
|
# Brand Intelligence API
|
||||||
|
|
||||||
|
## Endpoint
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/analytics/v2/brand/:name/intelligence
|
||||||
|
```
|
||||||
|
|
||||||
|
## Query Parameters
|
||||||
|
|
||||||
|
| Param | Type | Default | Description |
|
||||||
|
|-------|------|---------|-------------|
|
||||||
|
| `window` | `7d\|30d\|90d` | `30d` | Time window for trend calculations |
|
||||||
|
| `state` | string | - | Filter by state code (e.g., `AZ`) |
|
||||||
|
| `category` | string | - | Filter by category (e.g., `Flower`) |
|
||||||
|
|
||||||
|
## Response Payload Schema
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface BrandIntelligenceResult {
|
||||||
|
brand_name: string;
|
||||||
|
window: '7d' | '30d' | '90d';
|
||||||
|
generated_at: string; // ISO timestamp when data was computed
|
||||||
|
|
||||||
|
performance_snapshot: PerformanceSnapshot;
|
||||||
|
alerts: Alerts;
|
||||||
|
sku_performance: SkuPerformance[];
|
||||||
|
retail_footprint: RetailFootprint;
|
||||||
|
competitive_landscape: CompetitiveLandscape;
|
||||||
|
inventory_health: InventoryHealth;
|
||||||
|
promo_performance: PromoPerformance;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 1: Performance Snapshot
|
||||||
|
|
||||||
|
Summary cards with key brand metrics.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface PerformanceSnapshot {
|
||||||
|
active_skus: number; // Total products in catalog
|
||||||
|
total_revenue_30d: number | null; // Estimated from qty × price
|
||||||
|
total_stores: number; // Active retail partners
|
||||||
|
new_stores_30d: number; // New distribution in window
|
||||||
|
market_share: number | null; // % of category SKUs
|
||||||
|
avg_wholesale_price: number | null;
|
||||||
|
price_position: 'premium' | 'value' | 'competitive';
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label | Helper Text |
|
||||||
|
|-------|-------------------|-------------|
|
||||||
|
| `active_skus` | Active Products | X total in catalog |
|
||||||
|
| `total_revenue_30d` | Monthly Revenue | Estimated from sales |
|
||||||
|
| `total_stores` | Retail Distribution | Active retail partners |
|
||||||
|
| `new_stores_30d` | New Opportunities | X new in last 30 days |
|
||||||
|
| `market_share` | Category Position | % of category |
|
||||||
|
| `avg_wholesale_price` | Avg Wholesale | Per unit |
|
||||||
|
| `price_position` | Pricing Tier | Premium/Value/Market Rate |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 2: Alerts
|
||||||
|
|
||||||
|
Issues requiring attention.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface Alerts {
|
||||||
|
lost_stores_30d_count: number;
|
||||||
|
lost_skus_30d_count: number;
|
||||||
|
competitor_takeover_count: number;
|
||||||
|
avg_oos_duration_days: number | null;
|
||||||
|
avg_reorder_lag_days: number | null;
|
||||||
|
items: AlertItem[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface AlertItem {
|
||||||
|
type: 'lost_store' | 'delisted_sku' | 'shelf_loss' | 'extended_oos';
|
||||||
|
severity: 'critical' | 'warning';
|
||||||
|
store_name?: string;
|
||||||
|
product_name?: string;
|
||||||
|
competitor_brand?: string;
|
||||||
|
days_since?: number;
|
||||||
|
state_code?: string;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `lost_stores_30d_count` | Accounts at Risk |
|
||||||
|
| `lost_skus_30d_count` | Delisted SKUs |
|
||||||
|
| `competitor_takeover_count` | Shelf Losses |
|
||||||
|
| `avg_oos_duration_days` | Avg Stockout Length |
|
||||||
|
| `avg_reorder_lag_days` | Avg Restock Time |
|
||||||
|
| `severity: critical` | Urgent |
|
||||||
|
| `severity: warning` | Watch |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 3: SKU Performance (Product Velocity)
|
||||||
|
|
||||||
|
How fast each SKU sells.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface SkuPerformance {
|
||||||
|
store_product_id: number;
|
||||||
|
product_name: string;
|
||||||
|
category: string | null;
|
||||||
|
daily_velocity: number; // Units/day estimate
|
||||||
|
velocity_status: 'hot' | 'steady' | 'slow' | 'stale';
|
||||||
|
retail_price: number | null;
|
||||||
|
on_sale: boolean;
|
||||||
|
stores_carrying: number;
|
||||||
|
stock_status: 'in_stock' | 'low_stock' | 'out_of_stock';
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `daily_velocity` | Daily Rate |
|
||||||
|
| `velocity_status` | Momentum |
|
||||||
|
| `velocity_status: hot` | Hot |
|
||||||
|
| `velocity_status: steady` | Steady |
|
||||||
|
| `velocity_status: slow` | Slow |
|
||||||
|
| `velocity_status: stale` | Stale |
|
||||||
|
| `retail_price` | Retail Price |
|
||||||
|
| `on_sale` | Promo (badge) |
|
||||||
|
|
||||||
|
**Velocity Thresholds:**
|
||||||
|
- `hot`: >= 5 units/day
|
||||||
|
- `steady`: >= 1 unit/day
|
||||||
|
- `slow`: >= 0.1 units/day
|
||||||
|
- `stale`: < 0.1 units/day
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 4: Retail Footprint
|
||||||
|
|
||||||
|
Store placement and coverage.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface RetailFootprint {
|
||||||
|
total_stores: number;
|
||||||
|
in_stock_count: number;
|
||||||
|
out_of_stock_count: number;
|
||||||
|
penetration_by_region: RegionPenetration[];
|
||||||
|
whitespace_stores: WhitespaceStore[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface RegionPenetration {
|
||||||
|
state_code: string;
|
||||||
|
store_count: number;
|
||||||
|
percent_reached: number; // % of state's dispensaries
|
||||||
|
in_stock: number;
|
||||||
|
out_of_stock: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface WhitespaceStore {
|
||||||
|
store_id: number;
|
||||||
|
store_name: string;
|
||||||
|
state_code: string;
|
||||||
|
city: string | null;
|
||||||
|
category_fit: number; // How many competing brands they carry
|
||||||
|
competitor_brands: string[];
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `penetration_by_region` | Market Coverage by Region |
|
||||||
|
| `percent_reached` | X% reached |
|
||||||
|
| `in_stock` | X stocked |
|
||||||
|
| `out_of_stock` | X out |
|
||||||
|
| `whitespace_stores` | Expansion Opportunities |
|
||||||
|
| `category_fit` | X fit |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 5: Competitive Landscape
|
||||||
|
|
||||||
|
Market positioning vs competitors.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface CompetitiveLandscape {
|
||||||
|
brand_price_position: 'premium' | 'value' | 'competitive';
|
||||||
|
market_share_trend: MarketSharePoint[];
|
||||||
|
competitors: Competitor[];
|
||||||
|
head_to_head_skus: HeadToHead[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface MarketSharePoint {
|
||||||
|
date: string;
|
||||||
|
share_percent: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Competitor {
|
||||||
|
brand_name: string;
|
||||||
|
store_overlap_percent: number;
|
||||||
|
price_position: 'premium' | 'value' | 'competitive';
|
||||||
|
avg_price: number | null;
|
||||||
|
sku_count: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface HeadToHead {
|
||||||
|
product_name: string;
|
||||||
|
brand_price: number;
|
||||||
|
competitor_brand: string;
|
||||||
|
competitor_price: number;
|
||||||
|
price_diff_percent: number;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `price_position: premium` | Premium Tier |
|
||||||
|
| `price_position: value` | Value Leader |
|
||||||
|
| `price_position: competitive` | Market Rate |
|
||||||
|
| `market_share_trend` | Share of Shelf Trend |
|
||||||
|
| `head_to_head_skus` | Price Comparison |
|
||||||
|
| `store_overlap_percent` | X% store overlap |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 6: Inventory Health
|
||||||
|
|
||||||
|
Stock projections and risk levels.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface InventoryHealth {
|
||||||
|
critical_count: number; // <7 days stock
|
||||||
|
warning_count: number; // 7-14 days stock
|
||||||
|
healthy_count: number; // 14-90 days stock
|
||||||
|
overstocked_count: number; // >90 days stock
|
||||||
|
skus: InventorySku[];
|
||||||
|
overstock_alert: OverstockItem[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface InventorySku {
|
||||||
|
store_product_id: number;
|
||||||
|
product_name: string;
|
||||||
|
store_name: string;
|
||||||
|
days_of_stock: number | null;
|
||||||
|
risk_level: 'critical' | 'elevated' | 'moderate' | 'healthy';
|
||||||
|
current_quantity: number | null;
|
||||||
|
daily_sell_rate: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface OverstockItem {
|
||||||
|
product_name: string;
|
||||||
|
store_name: string;
|
||||||
|
excess_units: number;
|
||||||
|
days_of_stock: number;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `risk_level: critical` | Reorder Now |
|
||||||
|
| `risk_level: elevated` | Low Stock |
|
||||||
|
| `risk_level: moderate` | Monitor |
|
||||||
|
| `risk_level: healthy` | Healthy |
|
||||||
|
| `critical_count` | Urgent (<7 days) |
|
||||||
|
| `warning_count` | Low (7-14 days) |
|
||||||
|
| `overstocked_count` | Excess (>90 days) |
|
||||||
|
| `days_of_stock` | X days remaining |
|
||||||
|
| `overstock_alert` | Overstock Alert |
|
||||||
|
| `excess_units` | X excess units |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 7: Promotion Effectiveness
|
||||||
|
|
||||||
|
How promotions impact sales.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface PromoPerformance {
|
||||||
|
avg_baseline_velocity: number | null;
|
||||||
|
avg_promo_velocity: number | null;
|
||||||
|
avg_velocity_lift: number | null; // % increase during promo
|
||||||
|
avg_efficiency_score: number | null; // ROI proxy
|
||||||
|
promotions: Promotion[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Promotion {
|
||||||
|
product_name: string;
|
||||||
|
store_name: string;
|
||||||
|
status: 'active' | 'scheduled' | 'ended';
|
||||||
|
start_date: string;
|
||||||
|
end_date: string | null;
|
||||||
|
regular_price: number;
|
||||||
|
promo_price: number;
|
||||||
|
discount_percent: number;
|
||||||
|
baseline_velocity: number | null;
|
||||||
|
promo_velocity: number | null;
|
||||||
|
velocity_lift: number | null;
|
||||||
|
efficiency_score: number | null;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `avg_baseline_velocity` | Normal Rate |
|
||||||
|
| `avg_promo_velocity` | During Promos |
|
||||||
|
| `avg_velocity_lift` | Avg Sales Lift |
|
||||||
|
| `avg_efficiency_score` | ROI Score |
|
||||||
|
| `velocity_lift` | Sales Lift |
|
||||||
|
| `efficiency_score` | ROI Score |
|
||||||
|
| `status: active` | Live |
|
||||||
|
| `status: scheduled` | Scheduled |
|
||||||
|
| `status: ended` | Ended |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Example Queries
|
||||||
|
|
||||||
|
### Get full payload
|
||||||
|
```javascript
|
||||||
|
const response = await fetch('/api/analytics/v2/brand/Wyld/intelligence?window=30d');
|
||||||
|
const data = await response.json();
|
||||||
|
```
|
||||||
|
|
||||||
|
### Extract summary cards (flattened)
|
||||||
|
```javascript
|
||||||
|
const { performance_snapshot: ps, alerts } = data;
|
||||||
|
|
||||||
|
const summaryCards = {
|
||||||
|
activeProducts: ps.active_skus,
|
||||||
|
monthlyRevenue: ps.total_revenue_30d,
|
||||||
|
retailDistribution: ps.total_stores,
|
||||||
|
newOpportunities: ps.new_stores_30d,
|
||||||
|
categoryPosition: ps.market_share,
|
||||||
|
avgWholesale: ps.avg_wholesale_price,
|
||||||
|
pricingTier: ps.price_position,
|
||||||
|
accountsAtRisk: alerts.lost_stores_30d_count,
|
||||||
|
delistedSkus: alerts.lost_skus_30d_count,
|
||||||
|
shelfLosses: alerts.competitor_takeover_count,
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get top 10 fastest selling SKUs
|
||||||
|
```javascript
|
||||||
|
const topSkus = data.sku_performance
|
||||||
|
.filter(sku => sku.velocity_status === 'hot' || sku.velocity_status === 'steady')
|
||||||
|
.sort((a, b) => b.daily_velocity - a.daily_velocity)
|
||||||
|
.slice(0, 10);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get critical inventory alerts only
|
||||||
|
```javascript
|
||||||
|
const criticalInventory = data.inventory_health.skus
|
||||||
|
.filter(sku => sku.risk_level === 'critical');
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get states with <50% penetration
|
||||||
|
```javascript
|
||||||
|
const underPenetrated = data.retail_footprint.penetration_by_region
|
||||||
|
.filter(region => region.percent_reached < 50)
|
||||||
|
.sort((a, b) => a.percent_reached - b.percent_reached);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get active promotions with positive lift
|
||||||
|
```javascript
|
||||||
|
const effectivePromos = data.promo_performance.promotions
|
||||||
|
.filter(p => p.status === 'active' && p.velocity_lift > 0)
|
||||||
|
.sort((a, b) => b.velocity_lift - a.velocity_lift);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Build chart data for market share trend
|
||||||
|
```javascript
|
||||||
|
const chartData = data.competitive_landscape.market_share_trend.map(point => ({
|
||||||
|
x: new Date(point.date),
|
||||||
|
y: point.share_percent,
|
||||||
|
}));
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Notes for Frontend Implementation
|
||||||
|
|
||||||
|
1. **All fields are snake_case** - transform to camelCase if needed
|
||||||
|
2. **Null values are possible** - handle gracefully in UI
|
||||||
|
3. **Arrays may be empty** - show appropriate empty states
|
||||||
|
4. **Timestamps are ISO format** - parse with `new Date()`
|
||||||
|
5. **Percentages are already computed** - no need to multiply by 100
|
||||||
|
6. **The `window` parameter affects trend calculations** - 7d/30d/90d
|
||||||
539
backend/docs/_archive/CRAWL_PIPELINE.md
Normal file
539
backend/docs/_archive/CRAWL_PIPELINE.md
Normal file
@@ -0,0 +1,539 @@
|
|||||||
|
# Crawl Pipeline Documentation
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The crawl pipeline fetches product data from Dutchie dispensary menus and stores it in the canonical database. This document covers the complete flow from task scheduling to data storage.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Pipeline Stages
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ store_discovery │ Find new dispensaries
|
||||||
|
└─────────┬───────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ entry_point_discovery│ Resolve slug → platform_dispensary_id
|
||||||
|
└─────────┬───────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ product_discovery │ Initial product crawl
|
||||||
|
└─────────┬───────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ product_resync │ Recurring crawl (every 4 hours)
|
||||||
|
└─────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Stage Details
|
||||||
|
|
||||||
|
### 1. Store Discovery
|
||||||
|
**Purpose:** Find new dispensaries to crawl
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/store-discovery.ts`
|
||||||
|
|
||||||
|
**Flow:**
|
||||||
|
1. Query Dutchie `ConsumerDispensaries` GraphQL for cities/states
|
||||||
|
2. Extract dispensary info (name, address, menu_url)
|
||||||
|
3. Insert into `dutchie_discovery_locations`
|
||||||
|
4. Queue `entry_point_discovery` for each new location
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2. Entry Point Discovery
|
||||||
|
**Purpose:** Resolve menu URL slug to platform_dispensary_id (MongoDB ObjectId)
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/entry-point-discovery.ts`
|
||||||
|
|
||||||
|
**Flow:**
|
||||||
|
1. Load dispensary from database
|
||||||
|
2. Extract slug from `menu_url`:
|
||||||
|
- `/embedded-menu/<slug>` or `/dispensary/<slug>`
|
||||||
|
3. Start stealth session (fingerprint + proxy)
|
||||||
|
4. Query `resolveDispensaryIdWithDetails(slug)` via GraphQL
|
||||||
|
5. Update dispensary with `platform_dispensary_id`
|
||||||
|
6. Queue `product_discovery` task
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```
|
||||||
|
menu_url: https://dutchie.com/embedded-menu/deeply-rooted
|
||||||
|
slug: deeply-rooted
|
||||||
|
platform_dispensary_id: 6405ef617056e8014d79101b
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3. Product Discovery
|
||||||
|
**Purpose:** Initial crawl of a new dispensary
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/product-discovery.ts`
|
||||||
|
|
||||||
|
Same as product_resync but for first-time crawls.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 4. Product Resync
|
||||||
|
**Purpose:** Recurring crawl to capture price/stock changes
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/product-resync.ts`
|
||||||
|
|
||||||
|
**Flow:**
|
||||||
|
|
||||||
|
#### Step 1: Load Dispensary Info
|
||||||
|
```sql
|
||||||
|
SELECT id, name, platform_dispensary_id, menu_url, state
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE id = $1 AND crawl_enabled = true
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Step 2: Start Stealth Session
|
||||||
|
- Generate random browser fingerprint
|
||||||
|
- Set locale/timezone matching state
|
||||||
|
- Optional proxy rotation
|
||||||
|
|
||||||
|
#### Step 3: Fetch Products via GraphQL
|
||||||
|
**Endpoint:** `https://dutchie.com/api-3/graphql`
|
||||||
|
|
||||||
|
**Variables:**
|
||||||
|
```javascript
|
||||||
|
{
|
||||||
|
includeEnterpriseSpecials: false,
|
||||||
|
productsFilter: {
|
||||||
|
dispensaryId: "<platform_dispensary_id>",
|
||||||
|
pricingType: "rec",
|
||||||
|
Status: "All",
|
||||||
|
types: [],
|
||||||
|
useCache: false,
|
||||||
|
isDefaultSort: true,
|
||||||
|
sortBy: "popularSortIdx",
|
||||||
|
sortDirection: 1,
|
||||||
|
bypassOnlineThresholds: true,
|
||||||
|
isKioskMenu: false,
|
||||||
|
removeProductsBelowOptionThresholds: false
|
||||||
|
},
|
||||||
|
page: 0,
|
||||||
|
perPage: 100
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key Notes:**
|
||||||
|
- `Status: "All"` returns all products (Active returns same count)
|
||||||
|
- `Status: null` returns 0 products (broken)
|
||||||
|
- `pricingType: "rec"` returns BOTH rec and med prices
|
||||||
|
- Paginate until `products.length < perPage` or `allProducts.length >= totalCount`
|
||||||
|
|
||||||
|
#### Step 4: Normalize Data
|
||||||
|
Transform raw Dutchie payload to canonical format via `DutchieNormalizer`.
|
||||||
|
|
||||||
|
#### Step 5: Upsert Products
|
||||||
|
Insert/update `store_products` table with normalized data.
|
||||||
|
|
||||||
|
#### Step 6: Create Snapshots
|
||||||
|
Insert point-in-time record to `store_product_snapshots`.
|
||||||
|
|
||||||
|
#### Step 7: Track Missing Products (OOS Detection)
|
||||||
|
```sql
|
||||||
|
-- Reset consecutive_misses for products IN the feed
|
||||||
|
UPDATE store_products
|
||||||
|
SET consecutive_misses = 0, last_seen_at = NOW()
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND provider = 'dutchie'
|
||||||
|
AND provider_product_id = ANY($2)
|
||||||
|
|
||||||
|
-- Increment for products NOT in feed
|
||||||
|
UPDATE store_products
|
||||||
|
SET consecutive_misses = consecutive_misses + 1
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND provider = 'dutchie'
|
||||||
|
AND provider_product_id NOT IN (...)
|
||||||
|
AND consecutive_misses < 3
|
||||||
|
|
||||||
|
-- Mark OOS at 3 consecutive misses
|
||||||
|
UPDATE store_products
|
||||||
|
SET stock_status = 'oos', is_in_stock = false
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND consecutive_misses >= 3
|
||||||
|
AND stock_status != 'oos'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Step 8: Download Images
|
||||||
|
For new products, download and store images locally.
|
||||||
|
|
||||||
|
#### Step 9: Update Dispensary
|
||||||
|
```sql
|
||||||
|
UPDATE dispensaries SET last_crawl_at = NOW() WHERE id = $1
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## GraphQL Payload Structure
|
||||||
|
|
||||||
|
### Product Fields (from filteredProducts.products[])
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `_id` / `id` | string | MongoDB ObjectId (24 hex chars) |
|
||||||
|
| `Name` | string | Product display name |
|
||||||
|
| `brandName` | string | Brand name |
|
||||||
|
| `brand.name` | string | Brand name (nested) |
|
||||||
|
| `brand.description` | string | Brand description |
|
||||||
|
| `type` | string | Category (Flower, Edible, Concentrate, etc.) |
|
||||||
|
| `subcategory` | string | Subcategory |
|
||||||
|
| `strainType` | string | Hybrid, Indica, Sativa, N/A |
|
||||||
|
| `Status` | string | Always "Active" in feed |
|
||||||
|
| `Image` | string | Primary image URL |
|
||||||
|
| `images[]` | array | All product images |
|
||||||
|
|
||||||
|
### Pricing Fields
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `Prices[]` | number[] | Rec prices per option |
|
||||||
|
| `recPrices[]` | number[] | Rec prices |
|
||||||
|
| `medicalPrices[]` | number[] | Medical prices |
|
||||||
|
| `recSpecialPrices[]` | number[] | Rec sale prices |
|
||||||
|
| `medicalSpecialPrices[]` | number[] | Medical sale prices |
|
||||||
|
| `Options[]` | string[] | Size options ("1/8oz", "1g", etc.) |
|
||||||
|
| `rawOptions[]` | string[] | Raw weight options ("3.5g") |
|
||||||
|
|
||||||
|
### Inventory Fields (POSMetaData.children[])
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `quantity` | number | Total inventory count |
|
||||||
|
| `quantityAvailable` | number | Available for online orders |
|
||||||
|
| `kioskQuantityAvailable` | number | Available for kiosk orders |
|
||||||
|
| `option` | string | Which size option this is for |
|
||||||
|
|
||||||
|
### Potency Fields
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `THCContent.range[]` | number[] | THC percentage |
|
||||||
|
| `CBDContent.range[]` | number[] | CBD percentage |
|
||||||
|
| `cannabinoidsV2[]` | array | Detailed cannabinoid breakdown |
|
||||||
|
|
||||||
|
### Specials (specialData.bogoSpecials[])
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `specialName` | string | Deal name |
|
||||||
|
| `specialType` | string | "bogo", "sale", etc. |
|
||||||
|
| `itemsForAPrice.value` | string | Bundle price |
|
||||||
|
| `bogoRewards[].totalQuantity.quantity` | number | Required quantity |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## OOS Detection Logic
|
||||||
|
|
||||||
|
Products disappear from the Dutchie feed when they go out of stock. We track this via `consecutive_misses`:
|
||||||
|
|
||||||
|
| Scenario | Action |
|
||||||
|
|----------|--------|
|
||||||
|
| Product in feed | `consecutive_misses = 0` |
|
||||||
|
| Product missing 1st time | `consecutive_misses = 1` |
|
||||||
|
| Product missing 2nd time | `consecutive_misses = 2` |
|
||||||
|
| Product missing 3rd time | `consecutive_misses = 3`, mark `stock_status = 'oos'` |
|
||||||
|
| Product returns to feed | `consecutive_misses = 0`, update stock_status |
|
||||||
|
|
||||||
|
**Why 3 misses?**
|
||||||
|
- Protects against false positives from crawl failures
|
||||||
|
- Single bad crawl doesn't trigger mass OOS alerts
|
||||||
|
- Balances detection speed vs accuracy
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Database Tables
|
||||||
|
|
||||||
|
### store_products
|
||||||
|
Current state of each product:
|
||||||
|
- `provider_product_id` - Dutchie's MongoDB ObjectId
|
||||||
|
- `name_raw`, `brand_name_raw` - Raw values from feed
|
||||||
|
- `price_rec`, `price_med` - Current prices
|
||||||
|
- `is_in_stock`, `stock_status` - Availability
|
||||||
|
- `consecutive_misses` - OOS detection counter
|
||||||
|
- `last_seen_at` - Last time product was in feed
|
||||||
|
|
||||||
|
### store_product_snapshots
|
||||||
|
Point-in-time records for historical analysis:
|
||||||
|
- One row per product per crawl
|
||||||
|
- Captures price, stock, potency at that moment
|
||||||
|
- Used for price history, analytics
|
||||||
|
|
||||||
|
### dispensaries
|
||||||
|
Store metadata:
|
||||||
|
- `platform_dispensary_id` - MongoDB ObjectId for GraphQL
|
||||||
|
- `menu_url` - Source URL
|
||||||
|
- `last_crawl_at` - Last successful crawl
|
||||||
|
- `crawl_enabled` - Whether to crawl
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Worker Roles
|
||||||
|
|
||||||
|
Workers pull tasks from the `worker_tasks` queue based on their assigned role.
|
||||||
|
|
||||||
|
| Role | Name | Description | Handler |
|
||||||
|
|------|------|-------------|---------|
|
||||||
|
| `product_resync` | Product Resync | Re-crawl dispensary products for price/stock changes | `handleProductResync` |
|
||||||
|
| `product_discovery` | Product Discovery | Initial product discovery for new dispensaries | `handleProductDiscovery` |
|
||||||
|
| `store_discovery` | Store Discovery | Discover new dispensary locations | `handleStoreDiscovery` |
|
||||||
|
| `entry_point_discovery` | Entry Point Discovery | Resolve platform IDs from menu URLs | `handleEntryPointDiscovery` |
|
||||||
|
| `analytics_refresh` | Analytics Refresh | Refresh materialized views and analytics | `handleAnalyticsRefresh` |
|
||||||
|
|
||||||
|
**API Endpoint:** `GET /api/worker-registry/roles`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Scheduling
|
||||||
|
|
||||||
|
Crawls are scheduled via `worker_tasks` table:
|
||||||
|
|
||||||
|
| Role | Frequency | Description |
|
||||||
|
|------|-----------|-------------|
|
||||||
|
| `product_resync` | Every 4 hours | Regular product refresh |
|
||||||
|
| `product_discovery` | On-demand | First crawl for new stores |
|
||||||
|
| `entry_point_discovery` | On-demand | New store setup |
|
||||||
|
| `store_discovery` | Daily | Find new stores |
|
||||||
|
| `analytics_refresh` | Daily | Refresh analytics materialized views |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Priority & On-Demand Tasks
|
||||||
|
|
||||||
|
Tasks are claimed by workers in order of **priority DESC, created_at ASC**.
|
||||||
|
|
||||||
|
### Priority Levels
|
||||||
|
|
||||||
|
| Priority | Use Case | Example |
|
||||||
|
|----------|----------|---------|
|
||||||
|
| 0 | Scheduled/batch tasks | Daily product_resync generation |
|
||||||
|
| 10 | On-demand/chained tasks | entry_point → product_discovery |
|
||||||
|
| Higher | Urgent/manual triggers | Admin-triggered immediate crawl |
|
||||||
|
|
||||||
|
### Task Chaining
|
||||||
|
|
||||||
|
When a task completes, the system automatically creates follow-up tasks:
|
||||||
|
|
||||||
|
```
|
||||||
|
store_discovery (completed)
|
||||||
|
└─► entry_point_discovery (priority: 10) for each new store
|
||||||
|
|
||||||
|
entry_point_discovery (completed, success)
|
||||||
|
└─► product_discovery (priority: 10) for that store
|
||||||
|
|
||||||
|
product_discovery (completed)
|
||||||
|
└─► [no chain] Store enters regular resync schedule
|
||||||
|
```
|
||||||
|
|
||||||
|
### On-Demand Task Creation
|
||||||
|
|
||||||
|
Use the task service to create high-priority tasks:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Create immediate product resync for a store
|
||||||
|
await taskService.createTask({
|
||||||
|
role: 'product_resync',
|
||||||
|
dispensary_id: 123,
|
||||||
|
platform: 'dutchie',
|
||||||
|
priority: 20, // Higher than batch tasks
|
||||||
|
});
|
||||||
|
|
||||||
|
// Convenience methods with default high priority (10)
|
||||||
|
await taskService.createEntryPointTask(dispensaryId, 'dutchie');
|
||||||
|
await taskService.createProductDiscoveryTask(dispensaryId, 'dutchie');
|
||||||
|
await taskService.createStoreDiscoveryTask('dutchie', 'AZ');
|
||||||
|
```
|
||||||
|
|
||||||
|
### Claim Function
|
||||||
|
|
||||||
|
The `claim_task()` SQL function atomically claims tasks:
|
||||||
|
- Respects priority ordering (higher = first)
|
||||||
|
- Uses `FOR UPDATE SKIP LOCKED` for concurrency
|
||||||
|
- Prevents multiple active tasks per store
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Image Storage
|
||||||
|
|
||||||
|
Images are downloaded from Dutchie's AWS S3 and stored locally with on-demand resizing.
|
||||||
|
|
||||||
|
### Storage Path
|
||||||
|
```
|
||||||
|
/storage/images/products/<state>/<store>/<brand>/<product_id>/image-<hash>.webp
|
||||||
|
/storage/images/brands/<brand>/logo-<hash>.webp
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```
|
||||||
|
/storage/images/products/az/az-deeply-rooted/bud-bros/6913e3cd444eac3935e928b9/image-ae38b1f9.webp
|
||||||
|
```
|
||||||
|
|
||||||
|
### Image Proxy API
|
||||||
|
Served via `/img/*` with on-demand resizing using **sharp**:
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /img/products/az/az-deeply-rooted/bud-bros/6913e3cd444eac3935e928b9/image-ae38b1f9.webp?w=200
|
||||||
|
```
|
||||||
|
|
||||||
|
| Param | Description |
|
||||||
|
|-------|-------------|
|
||||||
|
| `w` | Width in pixels (max 4000) |
|
||||||
|
| `h` | Height in pixels (max 4000) |
|
||||||
|
| `q` | Quality 1-100 (default 80) |
|
||||||
|
| `fit` | cover, contain, fill, inside, outside |
|
||||||
|
| `blur` | Blur sigma (0.3-1000) |
|
||||||
|
| `gray` | Grayscale (1 = enabled) |
|
||||||
|
| `format` | webp, jpeg, png, avif (default webp) |
|
||||||
|
|
||||||
|
### Key Files
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/utils/image-storage.ts` | Download & save images to local filesystem |
|
||||||
|
| `src/routes/image-proxy.ts` | On-demand resize/transform at `/img/*` |
|
||||||
|
|
||||||
|
### Download Rules
|
||||||
|
|
||||||
|
| Scenario | Image Action |
|
||||||
|
|----------|--------------|
|
||||||
|
| **New product (first crawl)** | Download if `primaryImageUrl` exists |
|
||||||
|
| **Existing product (refresh)** | Download only if `local_image_path` is NULL (backfill) |
|
||||||
|
| **Product already has local image** | Skip download entirely |
|
||||||
|
|
||||||
|
**Logic:**
|
||||||
|
- Images are downloaded **once** and never re-downloaded on subsequent crawls
|
||||||
|
- `skipIfExists: true` - filesystem check prevents re-download even if queued
|
||||||
|
- First crawl: all products get images
|
||||||
|
- Refresh crawl: only new products or products missing local images
|
||||||
|
|
||||||
|
### Storage Rules
|
||||||
|
- **NO MinIO** - local filesystem only (`STORAGE_DRIVER=local`)
|
||||||
|
- Store full resolution, resize on-demand via `/img` proxy
|
||||||
|
- Convert to webp for consistency using **sharp**
|
||||||
|
- Preserve original Dutchie URL as fallback in `image_url` column
|
||||||
|
- Local path stored in `local_image_path` column
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Stealth & Anti-Detection
|
||||||
|
|
||||||
|
**PROXIES ARE REQUIRED** - Workers will fail to start if no active proxies are available in the database. All HTTP requests to Dutchie go through a proxy.
|
||||||
|
|
||||||
|
Workers automatically initialize anti-detection systems on startup.
|
||||||
|
|
||||||
|
### Components
|
||||||
|
|
||||||
|
| Component | Purpose | Source |
|
||||||
|
|-----------|---------|--------|
|
||||||
|
| **CrawlRotator** | Coordinates proxy + UA rotation | `src/services/crawl-rotator.ts` |
|
||||||
|
| **ProxyRotator** | Round-robin proxy selection, health tracking | `src/services/crawl-rotator.ts` |
|
||||||
|
| **UserAgentRotator** | Cycles through realistic browser fingerprints | `src/services/crawl-rotator.ts` |
|
||||||
|
| **Dutchie Client** | Curl-based HTTP with auto-retry on 403 | `src/platforms/dutchie/client.ts` |
|
||||||
|
|
||||||
|
### Initialization Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
Worker Start
|
||||||
|
│
|
||||||
|
├─► initializeStealth()
|
||||||
|
│ │
|
||||||
|
│ ├─► CrawlRotator.initialize()
|
||||||
|
│ │ └─► Load proxies from `proxies` table
|
||||||
|
│ │
|
||||||
|
│ └─► setCrawlRotator(rotator)
|
||||||
|
│ └─► Wire to Dutchie client
|
||||||
|
│
|
||||||
|
└─► Process tasks...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Stealth Session (per task)
|
||||||
|
|
||||||
|
Each crawl task starts a stealth session:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// In product-refresh.ts, entry-point-discovery.ts
|
||||||
|
const session = startSession(dispensary.state || 'AZ', 'America/Phoenix');
|
||||||
|
```
|
||||||
|
|
||||||
|
This creates a new identity with:
|
||||||
|
- **Random fingerprint:** Chrome/Firefox/Safari/Edge on Win/Mac/Linux
|
||||||
|
- **Accept-Language:** Matches timezone (e.g., `America/Phoenix` → `en-US,en;q=0.9`)
|
||||||
|
- **sec-ch-ua headers:** Proper Client Hints for the browser profile
|
||||||
|
|
||||||
|
### On 403 Block
|
||||||
|
|
||||||
|
When Dutchie returns 403, the client automatically:
|
||||||
|
|
||||||
|
1. Records failure on current proxy (increments `failure_count`)
|
||||||
|
2. If proxy has 5+ failures, deactivates it
|
||||||
|
3. Rotates to next healthy proxy
|
||||||
|
4. Rotates fingerprint
|
||||||
|
5. Retries the request
|
||||||
|
|
||||||
|
### Proxy Table Schema
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE proxies (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
host VARCHAR(255) NOT NULL,
|
||||||
|
port INTEGER NOT NULL,
|
||||||
|
username VARCHAR(100),
|
||||||
|
password VARCHAR(100),
|
||||||
|
protocol VARCHAR(10) DEFAULT 'http', -- http, https, socks5
|
||||||
|
is_active BOOLEAN DEFAULT true,
|
||||||
|
last_used_at TIMESTAMPTZ,
|
||||||
|
failure_count INTEGER DEFAULT 0,
|
||||||
|
success_count INTEGER DEFAULT 0,
|
||||||
|
avg_response_time_ms INTEGER,
|
||||||
|
last_failure_at TIMESTAMPTZ,
|
||||||
|
last_error TEXT
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
Proxies are mandatory. There is no environment variable to disable them. Workers will refuse to start without active proxies in the database.
|
||||||
|
|
||||||
|
### User-Agent Generation
|
||||||
|
|
||||||
|
See `workflow-12102025.md` for full specification.
|
||||||
|
|
||||||
|
**Summary:**
|
||||||
|
- Uses `intoli/user-agents` library (daily-updated market share data)
|
||||||
|
- Device distribution: Mobile 62%, Desktop 36%, Tablet 2%
|
||||||
|
- Browser whitelist: Chrome, Safari, Edge, Firefox only
|
||||||
|
- UA sticks until IP rotates (403 or manual rotation)
|
||||||
|
- Failure = alert admin + stop crawl (no fallback)
|
||||||
|
|
||||||
|
Each fingerprint includes proper `sec-ch-ua`, `sec-ch-ua-platform`, and `sec-ch-ua-mobile` headers.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
- **GraphQL errors:** Logged, task marked failed, retried later
|
||||||
|
- **Normalization errors:** Logged as warnings, continue with valid products
|
||||||
|
- **Image download errors:** Non-fatal, logged, continue
|
||||||
|
- **Database errors:** Task fails, will be retried
|
||||||
|
- **403 blocks:** Auto-rotate proxy + fingerprint, retry (up to 3 retries)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/tasks/handlers/product-resync.ts` | Main crawl handler |
|
||||||
|
| `src/tasks/handlers/entry-point-discovery.ts` | Slug → ID resolution |
|
||||||
|
| `src/platforms/dutchie/index.ts` | GraphQL client, session management |
|
||||||
|
| `src/hydration/normalizers/dutchie.ts` | Payload normalization |
|
||||||
|
| `src/hydration/canonical-upsert.ts` | Database upsert logic |
|
||||||
|
| `src/utils/image-storage.ts` | Image download and local storage |
|
||||||
|
| `src/routes/image-proxy.ts` | On-demand image resizing |
|
||||||
|
| `migrations/075_consecutive_misses.sql` | OOS tracking column |
|
||||||
297
backend/docs/_archive/ORGANIC_SCRAPING_GUIDE.md
Normal file
297
backend/docs/_archive/ORGANIC_SCRAPING_GUIDE.md
Normal file
@@ -0,0 +1,297 @@
|
|||||||
|
# Organic Browser-Based Scraping Guide
|
||||||
|
|
||||||
|
**Last Updated:** 2025-12-12
|
||||||
|
**Status:** Production-ready proof of concept
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This document describes the "organic" browser-based approach to scraping Dutchie dispensary menus. Unlike direct curl/axios requests, this method uses a real browser session to make API calls, making requests appear natural and reducing detection risk.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Why Organic Scraping?
|
||||||
|
|
||||||
|
| Approach | Detection Risk | Speed | Complexity |
|
||||||
|
|----------|---------------|-------|------------|
|
||||||
|
| Direct curl | Higher | Fast | Low |
|
||||||
|
| curl-impersonate | Medium | Fast | Medium |
|
||||||
|
| **Browser-based (organic)** | **Lowest** | Slower | Higher |
|
||||||
|
|
||||||
|
Direct curl requests can be fingerprinted via:
|
||||||
|
- TLS fingerprint (cipher suites, extensions)
|
||||||
|
- Header order and values
|
||||||
|
- Missing cookies/session data
|
||||||
|
- Request patterns
|
||||||
|
|
||||||
|
Browser-based requests inherit:
|
||||||
|
- Real Chrome TLS fingerprint
|
||||||
|
- Session cookies from page visit
|
||||||
|
- Natural header order
|
||||||
|
- JavaScript execution environment
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation
|
||||||
|
|
||||||
|
### Dependencies
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm install puppeteer puppeteer-extra puppeteer-extra-plugin-stealth
|
||||||
|
```
|
||||||
|
|
||||||
|
### Core Script: `test-intercept.js`
|
||||||
|
|
||||||
|
Located at: `backend/test-intercept.js`
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
const puppeteer = require('puppeteer-extra');
|
||||||
|
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
||||||
|
const fs = require('fs');
|
||||||
|
|
||||||
|
puppeteer.use(StealthPlugin());
|
||||||
|
|
||||||
|
async function capturePayload(config) {
|
||||||
|
const { dispensaryId, platformId, cName, outputPath } = config;
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: 'new',
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
// STEP 1: Establish session by visiting the menu
|
||||||
|
const embedUrl = `https://dutchie.com/embedded-menu/${cName}?menuType=rec`;
|
||||||
|
await page.goto(embedUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||||
|
|
||||||
|
// STEP 2: Fetch ALL products using GraphQL from browser context
|
||||||
|
const result = await page.evaluate(async (platformId) => {
|
||||||
|
const allProducts = [];
|
||||||
|
let pageNum = 0;
|
||||||
|
const perPage = 100;
|
||||||
|
let totalCount = 0;
|
||||||
|
const sessionId = 'browser-session-' + Date.now();
|
||||||
|
|
||||||
|
while (pageNum < 30) {
|
||||||
|
const variables = {
|
||||||
|
includeEnterpriseSpecials: false,
|
||||||
|
productsFilter: {
|
||||||
|
dispensaryId: platformId,
|
||||||
|
pricingType: 'rec',
|
||||||
|
Status: 'Active', // CRITICAL: Must be 'Active', not null
|
||||||
|
types: [],
|
||||||
|
useCache: true,
|
||||||
|
isDefaultSort: true,
|
||||||
|
sortBy: 'popularSortIdx',
|
||||||
|
sortDirection: 1,
|
||||||
|
bypassOnlineThresholds: true,
|
||||||
|
isKioskMenu: false,
|
||||||
|
removeProductsBelowOptionThresholds: false,
|
||||||
|
},
|
||||||
|
page: pageNum,
|
||||||
|
perPage: perPage,
|
||||||
|
};
|
||||||
|
|
||||||
|
const extensions = {
|
||||||
|
persistedQuery: {
|
||||||
|
version: 1,
|
||||||
|
sha256Hash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const qs = new URLSearchParams({
|
||||||
|
operationName: 'FilteredProducts',
|
||||||
|
variables: JSON.stringify(variables),
|
||||||
|
extensions: JSON.stringify(extensions)
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await fetch(`https://dutchie.com/api-3/graphql?${qs}`, {
|
||||||
|
method: 'GET',
|
||||||
|
headers: {
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'content-type': 'application/json',
|
||||||
|
'x-dutchie-session': sessionId,
|
||||||
|
'apollographql-client-name': 'Marketplace (production)',
|
||||||
|
},
|
||||||
|
credentials: 'include'
|
||||||
|
});
|
||||||
|
|
||||||
|
const json = await response.json();
|
||||||
|
const data = json?.data?.filteredProducts;
|
||||||
|
if (!data?.products) break;
|
||||||
|
|
||||||
|
allProducts.push(...data.products);
|
||||||
|
if (pageNum === 0) totalCount = data.queryInfo?.totalCount || 0;
|
||||||
|
if (allProducts.length >= totalCount) break;
|
||||||
|
|
||||||
|
pageNum++;
|
||||||
|
await new Promise(r => setTimeout(r, 200)); // Polite delay
|
||||||
|
}
|
||||||
|
|
||||||
|
return { products: allProducts, totalCount };
|
||||||
|
}, platformId);
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
|
// STEP 3: Save payload
|
||||||
|
const payload = {
|
||||||
|
dispensaryId,
|
||||||
|
platformId,
|
||||||
|
cName,
|
||||||
|
fetchedAt: new Date().toISOString(),
|
||||||
|
productCount: result.products.length,
|
||||||
|
products: result.products,
|
||||||
|
};
|
||||||
|
|
||||||
|
fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
|
||||||
|
return payload;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Critical Parameters
|
||||||
|
|
||||||
|
### GraphQL Hash (FilteredProducts)
|
||||||
|
|
||||||
|
```
|
||||||
|
ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0
|
||||||
|
```
|
||||||
|
|
||||||
|
**WARNING:** Using the wrong hash returns HTTP 400.
|
||||||
|
|
||||||
|
### Status Parameter
|
||||||
|
|
||||||
|
| Value | Result |
|
||||||
|
|-------|--------|
|
||||||
|
| `'Active'` | Returns in-stock products (1019 in test) |
|
||||||
|
| `null` | Returns 0 products |
|
||||||
|
| `'All'` | Returns HTTP 400 |
|
||||||
|
|
||||||
|
**ALWAYS use `Status: 'Active'`**
|
||||||
|
|
||||||
|
### Required Headers
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
{
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'content-type': 'application/json',
|
||||||
|
'x-dutchie-session': 'unique-session-id',
|
||||||
|
'apollographql-client-name': 'Marketplace (production)',
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Endpoint
|
||||||
|
|
||||||
|
```
|
||||||
|
https://dutchie.com/api-3/graphql
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Performance Benchmarks
|
||||||
|
|
||||||
|
Test store: AZ-Deeply-Rooted (1019 products)
|
||||||
|
|
||||||
|
| Metric | Value |
|
||||||
|
|--------|-------|
|
||||||
|
| Total products | 1019 |
|
||||||
|
| Time | 18.5 seconds |
|
||||||
|
| Payload size | 11.8 MB |
|
||||||
|
| Pages fetched | 11 (100 per page) |
|
||||||
|
| Success rate | 100% |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Payload Format
|
||||||
|
|
||||||
|
The output matches the existing `payload-fetch.ts` handler format:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"dispensaryId": 123,
|
||||||
|
"platformId": "6405ef617056e8014d79101b",
|
||||||
|
"cName": "AZ-Deeply-Rooted",
|
||||||
|
"fetchedAt": "2025-12-12T05:05:19.837Z",
|
||||||
|
"productCount": 1019,
|
||||||
|
"products": [
|
||||||
|
{
|
||||||
|
"id": "6927508db4851262f629a869",
|
||||||
|
"Name": "Product Name",
|
||||||
|
"brand": { "name": "Brand Name", ... },
|
||||||
|
"type": "Flower",
|
||||||
|
"THC": "25%",
|
||||||
|
"Prices": [...],
|
||||||
|
"Options": [...],
|
||||||
|
...
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Integration Points
|
||||||
|
|
||||||
|
### As a Task Handler
|
||||||
|
|
||||||
|
The organic approach can be integrated as an alternative to curl-based fetching:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// In src/tasks/handlers/organic-payload-fetch.ts
|
||||||
|
export async function handleOrganicPayloadFetch(ctx: TaskContext): Promise<TaskResult> {
|
||||||
|
// Use puppeteer-based capture
|
||||||
|
// Save to same payload storage
|
||||||
|
// Queue product_refresh task
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Worker Configuration
|
||||||
|
|
||||||
|
Add to job_schedules:
|
||||||
|
```sql
|
||||||
|
INSERT INTO job_schedules (name, role, cron_expression)
|
||||||
|
VALUES ('organic_product_crawl', 'organic_payload_fetch', '0 */6 * * *');
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### HTTP 400 Bad Request
|
||||||
|
- Check hash is correct: `ee29c060...`
|
||||||
|
- Verify Status is `'Active'` (string, not null)
|
||||||
|
|
||||||
|
### 0 Products Returned
|
||||||
|
- Status was likely `null` or `'All'` - use `'Active'`
|
||||||
|
- Check platformId is valid MongoDB ObjectId
|
||||||
|
|
||||||
|
### Session Not Established
|
||||||
|
- Increase timeout on initial page.goto()
|
||||||
|
- Check cName is valid (matches embedded-menu URL)
|
||||||
|
|
||||||
|
### Detection/Blocking
|
||||||
|
- StealthPlugin should handle most cases
|
||||||
|
- Add random delays between pages
|
||||||
|
- Use headless: 'new' (not true/false)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Files Reference
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `backend/test-intercept.js` | Proof of concept script |
|
||||||
|
| `backend/src/platforms/dutchie/client.ts` | GraphQL hashes, curl implementation |
|
||||||
|
| `backend/src/tasks/handlers/payload-fetch.ts` | Current curl-based handler |
|
||||||
|
| `backend/src/utils/payload-storage.ts` | Payload save/load utilities |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## See Also
|
||||||
|
|
||||||
|
- `DUTCHIE_CRAWL_WORKFLOW.md` - Full crawl pipeline documentation
|
||||||
|
- `TASK_WORKFLOW_2024-12-10.md` - Task system architecture
|
||||||
|
- `CLAUDE.md` - Project rules and constraints
|
||||||
25
backend/docs/_archive/README.md
Normal file
25
backend/docs/_archive/README.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# ARCHIVED DOCUMENTATION
|
||||||
|
|
||||||
|
**WARNING: These docs may be outdated or inaccurate.**
|
||||||
|
|
||||||
|
The code has evolved significantly. These docs are kept for historical reference only.
|
||||||
|
|
||||||
|
## What to Use Instead
|
||||||
|
|
||||||
|
**The single source of truth is:**
|
||||||
|
- `CLAUDE.md` (root) - Essential rules and quick reference
|
||||||
|
- `docs/CODEBASE_MAP.md` - Current file/directory reference
|
||||||
|
|
||||||
|
## Why Archive?
|
||||||
|
|
||||||
|
These docs were written during development iterations and may reference:
|
||||||
|
- Old file paths that no longer exist
|
||||||
|
- Deprecated approaches (hydration, scraper-v2)
|
||||||
|
- APIs that have changed
|
||||||
|
- Database schemas that evolved
|
||||||
|
|
||||||
|
## If You Need Details
|
||||||
|
|
||||||
|
1. First check CODEBASE_MAP.md for current file locations
|
||||||
|
2. Then read the actual source code
|
||||||
|
3. Only use archive docs as a last resort for historical context
|
||||||
584
backend/docs/_archive/TASK_WORKFLOW_2024-12-10.md
Normal file
584
backend/docs/_archive/TASK_WORKFLOW_2024-12-10.md
Normal file
@@ -0,0 +1,584 @@
|
|||||||
|
# Task Workflow Documentation
|
||||||
|
**Date: 2024-12-10**
|
||||||
|
|
||||||
|
This document describes the complete task/job processing architecture after the 2024-12-10 rewrite.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Complete Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ KUBERNETES CLUSTER │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ API SERVER POD (scraper) │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌──────────────────┐ ┌────────────────────────────────────────┐ │ │
|
||||||
|
│ │ │ Express API │ │ TaskScheduler │ │ │
|
||||||
|
│ │ │ │ │ (src/services/task-scheduler.ts) │ │ │
|
||||||
|
│ │ │ /api/job-queue │ │ │ │ │
|
||||||
|
│ │ │ /api/tasks │ │ • Polls every 60s │ │ │
|
||||||
|
│ │ │ /api/schedules │ │ • Checks task_schedules table │ │ │
|
||||||
|
│ │ └────────┬─────────┘ │ • SELECT FOR UPDATE SKIP LOCKED │ │ │
|
||||||
|
│ │ │ │ • Generates tasks when due │ │ │
|
||||||
|
│ │ │ └──────────────────┬─────────────────────┘ │ │
|
||||||
|
│ │ │ │ │ │
|
||||||
|
│ └────────────┼──────────────────────────────────┼──────────────────────────┘ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌────────────────────────┘ │
|
||||||
|
│ │ │ │
|
||||||
|
│ ▼ ▼ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ POSTGRESQL DATABASE │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌─────────────────────┐ ┌─────────────────────┐ │ │
|
||||||
|
│ │ │ task_schedules │ │ worker_tasks │ │ │
|
||||||
|
│ │ │ │ │ │ │ │
|
||||||
|
│ │ │ • product_refresh │───────►│ • pending tasks │ │ │
|
||||||
|
│ │ │ • store_discovery │ create │ • claimed tasks │ │ │
|
||||||
|
│ │ │ • analytics_refresh │ tasks │ • running tasks │ │ │
|
||||||
|
│ │ │ │ │ • completed tasks │ │ │
|
||||||
|
│ │ │ next_run_at │ │ │ │ │
|
||||||
|
│ │ │ last_run_at │ │ role, dispensary_id │ │ │
|
||||||
|
│ │ │ interval_hours │ │ priority, status │ │ │
|
||||||
|
│ │ └─────────────────────┘ └──────────┬──────────┘ │ │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ └─────────────────────────────────────────────┼────────────────────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ ┌──────────────────────┘ │
|
||||||
|
│ │ Workers poll for tasks │
|
||||||
|
│ │ (SELECT FOR UPDATE SKIP LOCKED) │
|
||||||
|
│ ▼ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ WORKER PODS (StatefulSet: scraper-worker) │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||||
|
│ │ │ Worker 0 │ │ Worker 1 │ │ Worker 2 │ │ Worker N │ │ │
|
||||||
|
│ │ │ │ │ │ │ │ │ │ │ │
|
||||||
|
│ │ │ task-worker │ │ task-worker │ │ task-worker │ │ task-worker │ │ │
|
||||||
|
│ │ │ .ts │ │ .ts │ │ .ts │ │ .ts │ │ │
|
||||||
|
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ └──────────────────────────────────────────────────────────────────────────┘ │
|
||||||
|
│ │
|
||||||
|
└──────────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Startup Sequence
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ API SERVER STARTUP │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ 1. Express app initializes │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 2. runAutoMigrations() │
|
||||||
|
│ • Runs pending migrations (including 079_task_schedules.sql) │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 3. initializeMinio() / initializeImageStorage() │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 4. cleanupOrphanedJobs() │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 5. taskScheduler.start() ◄─── NEW (per TASK_WORKFLOW_2024-12-10.md) │
|
||||||
|
│ │ │
|
||||||
|
│ ├── Recover stale tasks (workers that died) │
|
||||||
|
│ ├── Ensure default schedules exist in task_schedules │
|
||||||
|
│ ├── Check and run any due schedules immediately │
|
||||||
|
│ └── Start 60-second poll interval │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 6. app.listen(PORT) │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ WORKER POD STARTUP │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ 1. K8s starts pod from StatefulSet │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 2. TaskWorker.constructor() │
|
||||||
|
│ • Create DB pool │
|
||||||
|
│ • Create CrawlRotator │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 3. initializeStealth() │
|
||||||
|
│ • Load proxies from DB (REQUIRED - fails if none) │
|
||||||
|
│ • Wire rotator to Dutchie client │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 4. register() with API │
|
||||||
|
│ • Optional - continues if fails │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 5. startRegistryHeartbeat() every 30s │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 6. processNextTask() loop │
|
||||||
|
│ │ │
|
||||||
|
│ ├── Poll for pending task (FOR UPDATE SKIP LOCKED) │
|
||||||
|
│ ├── Claim task atomically │
|
||||||
|
│ ├── Execute handler (product_refresh, store_discovery, etc.) │
|
||||||
|
│ ├── Mark complete/failed │
|
||||||
|
│ ├── Chain next task if applicable │
|
||||||
|
│ └── Loop │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Schedule Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ SCHEDULER POLL (every 60 seconds) │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ BEGIN TRANSACTION │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ SELECT * FROM task_schedules │
|
||||||
|
│ WHERE enabled = true AND next_run_at <= NOW() │
|
||||||
|
│ FOR UPDATE SKIP LOCKED ◄─── Prevents duplicate execution across replicas │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ For each due schedule: │
|
||||||
|
│ │ │
|
||||||
|
│ ├── product_refresh_all │
|
||||||
|
│ │ └─► Query dispensaries needing crawl │
|
||||||
|
│ │ └─► Create product_refresh tasks in worker_tasks │
|
||||||
|
│ │ │
|
||||||
|
│ ├── store_discovery_dutchie │
|
||||||
|
│ │ └─► Create single store_discovery task │
|
||||||
|
│ │ │
|
||||||
|
│ └── analytics_refresh │
|
||||||
|
│ └─► Create single analytics_refresh task │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ UPDATE task_schedules SET │
|
||||||
|
│ last_run_at = NOW(), │
|
||||||
|
│ next_run_at = NOW() + interval_hours │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ COMMIT │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Task Lifecycle
|
||||||
|
|
||||||
|
```
|
||||||
|
┌──────────┐
|
||||||
|
│ SCHEDULE │
|
||||||
|
│ DUE │
|
||||||
|
└────┬─────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────────┐ claim ┌──────────────┐ start ┌──────────────┐
|
||||||
|
│ PENDING │────────────►│ CLAIMED │────────────►│ RUNNING │
|
||||||
|
└──────────────┘ └──────────────┘ └──────┬───────┘
|
||||||
|
▲ │
|
||||||
|
│ ┌──────────────┼──────────────┐
|
||||||
|
│ retry │ │ │
|
||||||
|
│ (if retries < max) ▼ ▼ ▼
|
||||||
|
│ ┌──────────┐ ┌──────────┐ ┌──────────┐
|
||||||
|
└──────────────────────────────────│ FAILED │ │ COMPLETED│ │ STALE │
|
||||||
|
└──────────┘ └──────────┘ └────┬─────┘
|
||||||
|
│
|
||||||
|
recover_stale_tasks()
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────┐
|
||||||
|
│ PENDING │
|
||||||
|
└──────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Database Tables
|
||||||
|
|
||||||
|
### task_schedules (NEW - migration 079)
|
||||||
|
|
||||||
|
Stores schedule definitions. Survives restarts.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE task_schedules (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(100) NOT NULL UNIQUE,
|
||||||
|
role VARCHAR(50) NOT NULL, -- product_refresh, store_discovery, etc.
|
||||||
|
enabled BOOLEAN DEFAULT TRUE,
|
||||||
|
interval_hours INTEGER NOT NULL, -- How often to run
|
||||||
|
priority INTEGER DEFAULT 0, -- Task priority when created
|
||||||
|
state_code VARCHAR(2), -- Optional filter
|
||||||
|
last_run_at TIMESTAMPTZ, -- When it last ran
|
||||||
|
next_run_at TIMESTAMPTZ, -- When it's due next
|
||||||
|
last_task_count INTEGER, -- Tasks created last run
|
||||||
|
last_error TEXT -- Error message if failed
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### worker_tasks (migration 074)
|
||||||
|
|
||||||
|
The task queue. Workers pull from here.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE worker_tasks (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
role task_role NOT NULL, -- What type of work
|
||||||
|
dispensary_id INTEGER, -- Which store (if applicable)
|
||||||
|
platform VARCHAR(50), -- Which platform
|
||||||
|
status task_status DEFAULT 'pending',
|
||||||
|
priority INTEGER DEFAULT 0, -- Higher = process first
|
||||||
|
scheduled_for TIMESTAMP, -- Don't process before this time
|
||||||
|
worker_id VARCHAR(100), -- Which worker claimed it
|
||||||
|
claimed_at TIMESTAMP,
|
||||||
|
started_at TIMESTAMP,
|
||||||
|
completed_at TIMESTAMP,
|
||||||
|
last_heartbeat_at TIMESTAMP, -- For stale detection
|
||||||
|
result JSONB,
|
||||||
|
error_message TEXT,
|
||||||
|
retry_count INTEGER DEFAULT 0,
|
||||||
|
max_retries INTEGER DEFAULT 3
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Default Schedules
|
||||||
|
|
||||||
|
| Name | Role | Interval | Priority | Description |
|
||||||
|
|------|------|----------|----------|-------------|
|
||||||
|
| `payload_fetch_all` | payload_fetch | 4 hours | 0 | Fetch payloads from Dutchie API (chains to product_refresh) |
|
||||||
|
| `store_discovery_dutchie` | store_discovery | 24 hours | 5 | Find new Dutchie stores |
|
||||||
|
| `analytics_refresh` | analytics_refresh | 6 hours | 0 | Refresh MVs |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Task Roles
|
||||||
|
|
||||||
|
| Role | Description | Creates Tasks For |
|
||||||
|
|------|-------------|-------------------|
|
||||||
|
| `payload_fetch` | **NEW** - Fetch from Dutchie API, save to disk | Each dispensary needing crawl |
|
||||||
|
| `product_refresh` | **CHANGED** - Read local payload, normalize, upsert to DB | Chained from payload_fetch |
|
||||||
|
| `store_discovery` | Find new dispensaries, returns newStoreIds[] | Single task per platform |
|
||||||
|
| `entry_point_discovery` | **DEPRECATED** - Resolve platform IDs | No longer used |
|
||||||
|
| `product_discovery` | Initial product fetch for new stores | Chained from store_discovery |
|
||||||
|
| `analytics_refresh` | Refresh MVs | Single global task |
|
||||||
|
|
||||||
|
### Payload/Refresh Separation (2024-12-10)
|
||||||
|
|
||||||
|
The crawl workflow is now split into two phases:
|
||||||
|
|
||||||
|
```
|
||||||
|
payload_fetch (scheduled every 4h)
|
||||||
|
└─► Hit Dutchie GraphQL API
|
||||||
|
└─► Save raw JSON to /storage/payloads/{year}/{month}/{day}/store_{id}_{ts}.json.gz
|
||||||
|
└─► Record metadata in raw_crawl_payloads table
|
||||||
|
└─► Queue product_refresh task with payload_id
|
||||||
|
|
||||||
|
product_refresh (chained from payload_fetch)
|
||||||
|
└─► Load payload from filesystem (NOT from API)
|
||||||
|
└─► Normalize via DutchieNormalizer
|
||||||
|
└─► Upsert to store_products
|
||||||
|
└─► Create snapshots
|
||||||
|
└─► Track missing products
|
||||||
|
└─► Download images
|
||||||
|
```
|
||||||
|
|
||||||
|
**Benefits:**
|
||||||
|
- **Retry-friendly**: If normalize fails, re-run product_refresh without re-crawling
|
||||||
|
- **Replay-able**: Run product_refresh against any historical payload
|
||||||
|
- **Faster refreshes**: Local file read vs network call
|
||||||
|
- **Historical diffs**: Compare payloads to see what changed between crawls
|
||||||
|
- **Less API pressure**: Only payload_fetch hits Dutchie
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Task Chaining
|
||||||
|
|
||||||
|
Tasks automatically queue follow-up tasks upon successful completion. This creates two main flows:
|
||||||
|
|
||||||
|
### Discovery Flow (New Stores)
|
||||||
|
|
||||||
|
When `store_discovery` finds new dispensaries, they automatically get their initial product data:
|
||||||
|
|
||||||
|
```
|
||||||
|
store_discovery
|
||||||
|
└─► Discovers new locations via Dutchie GraphQL
|
||||||
|
└─► Auto-promotes valid locations to dispensaries table
|
||||||
|
└─► Collects newDispensaryIds[] from promotions
|
||||||
|
└─► Returns { newStoreIds: [...] } in result
|
||||||
|
|
||||||
|
chainNextTask() detects newStoreIds
|
||||||
|
└─► Creates product_discovery task for each new store
|
||||||
|
|
||||||
|
product_discovery
|
||||||
|
└─► Calls handlePayloadFetch() internally
|
||||||
|
└─► payload_fetch hits Dutchie API
|
||||||
|
└─► Saves raw JSON to /storage/payloads/
|
||||||
|
└─► Queues product_refresh task with payload_id
|
||||||
|
|
||||||
|
product_refresh
|
||||||
|
└─► Loads payload from filesystem
|
||||||
|
└─► Normalizes and upserts to store_products
|
||||||
|
└─► Creates snapshots, downloads images
|
||||||
|
```
|
||||||
|
|
||||||
|
**Complete Discovery Chain:**
|
||||||
|
```
|
||||||
|
store_discovery → product_discovery → payload_fetch → product_refresh
|
||||||
|
(internal call) (queues next)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Scheduled Flow (Existing Stores)
|
||||||
|
|
||||||
|
For existing stores, `payload_fetch_all` schedule runs every 4 hours:
|
||||||
|
|
||||||
|
```
|
||||||
|
TaskScheduler (every 60s)
|
||||||
|
└─► Checks task_schedules for due schedules
|
||||||
|
└─► payload_fetch_all is due
|
||||||
|
└─► Generates payload_fetch task for each dispensary
|
||||||
|
|
||||||
|
payload_fetch
|
||||||
|
└─► Hits Dutchie GraphQL API
|
||||||
|
└─► Saves raw JSON to /storage/payloads/
|
||||||
|
└─► Queues product_refresh task with payload_id
|
||||||
|
|
||||||
|
product_refresh
|
||||||
|
└─► Loads payload from filesystem (NOT API)
|
||||||
|
└─► Normalizes via DutchieNormalizer
|
||||||
|
└─► Upserts to store_products
|
||||||
|
└─► Creates snapshots
|
||||||
|
```
|
||||||
|
|
||||||
|
**Complete Scheduled Chain:**
|
||||||
|
```
|
||||||
|
payload_fetch → product_refresh
|
||||||
|
(queues) (reads local)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Chaining Implementation
|
||||||
|
|
||||||
|
Task chaining is handled in two places:
|
||||||
|
|
||||||
|
1. **Internal chaining (handler calls handler):**
|
||||||
|
- `product_discovery` calls `handlePayloadFetch()` directly
|
||||||
|
|
||||||
|
2. **External chaining (chainNextTask() in task-service.ts):**
|
||||||
|
- Called after task completion
|
||||||
|
- `store_discovery` → queues `product_discovery` for each newStoreId
|
||||||
|
|
||||||
|
3. **Queue-based chaining (taskService.createTask):**
|
||||||
|
- `payload_fetch` queues `product_refresh` with `payload: { payload_id }`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Payload API Endpoints
|
||||||
|
|
||||||
|
Raw crawl payloads can be accessed via the Payloads API:
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `GET /api/payloads` | GET | List payload metadata (paginated) |
|
||||||
|
| `GET /api/payloads/:id` | GET | Get payload metadata by ID |
|
||||||
|
| `GET /api/payloads/:id/data` | GET | Get full payload JSON (decompressed) |
|
||||||
|
| `GET /api/payloads/store/:dispensaryId` | GET | List payloads for a store |
|
||||||
|
| `GET /api/payloads/store/:dispensaryId/latest` | GET | Get latest payload for a store |
|
||||||
|
| `GET /api/payloads/store/:dispensaryId/diff` | GET | Diff two payloads for changes |
|
||||||
|
|
||||||
|
### Payload Diff Response
|
||||||
|
|
||||||
|
The diff endpoint returns:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"from": { "id": 123, "fetchedAt": "...", "productCount": 100 },
|
||||||
|
"to": { "id": 456, "fetchedAt": "...", "productCount": 105 },
|
||||||
|
"diff": {
|
||||||
|
"added": 10,
|
||||||
|
"removed": 5,
|
||||||
|
"priceChanges": 8,
|
||||||
|
"stockChanges": 12
|
||||||
|
},
|
||||||
|
"details": {
|
||||||
|
"added": [...],
|
||||||
|
"removed": [...],
|
||||||
|
"priceChanges": [...],
|
||||||
|
"stockChanges": [...]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Schedules (NEW)
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `GET /api/schedules` | GET | List all schedules |
|
||||||
|
| `PUT /api/schedules/:id` | PUT | Update schedule |
|
||||||
|
| `POST /api/schedules/:id/trigger` | POST | Run schedule immediately |
|
||||||
|
|
||||||
|
### Task Creation (rewired 2024-12-10)
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `POST /api/job-queue/enqueue` | POST | Create single task |
|
||||||
|
| `POST /api/job-queue/enqueue-batch` | POST | Create batch tasks |
|
||||||
|
| `POST /api/job-queue/enqueue-state` | POST | Create tasks for state |
|
||||||
|
| `POST /api/tasks` | POST | Direct task creation |
|
||||||
|
|
||||||
|
### Task Management
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `GET /api/tasks` | GET | List tasks |
|
||||||
|
| `GET /api/tasks/:id` | GET | Get single task |
|
||||||
|
| `GET /api/tasks/counts` | GET | Task counts by status |
|
||||||
|
| `POST /api/tasks/recover-stale` | POST | Recover stale tasks |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Key Files
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/services/task-scheduler.ts` | **NEW** - DB-driven scheduler |
|
||||||
|
| `src/tasks/task-worker.ts` | Worker that processes tasks |
|
||||||
|
| `src/tasks/task-service.ts` | Task CRUD operations |
|
||||||
|
| `src/tasks/handlers/payload-fetch.ts` | **NEW** - Fetches from API, saves to disk |
|
||||||
|
| `src/tasks/handlers/product-refresh.ts` | **CHANGED** - Reads from disk, processes to DB |
|
||||||
|
| `src/utils/payload-storage.ts` | **NEW** - Payload save/load utilities |
|
||||||
|
| `src/routes/tasks.ts` | Task API endpoints |
|
||||||
|
| `src/routes/job-queue.ts` | Job Queue UI endpoints (rewired) |
|
||||||
|
| `migrations/079_task_schedules.sql` | Schedule table |
|
||||||
|
| `migrations/080_raw_crawl_payloads.sql` | Payload metadata table |
|
||||||
|
| `migrations/081_payload_fetch_columns.sql` | payload, last_fetch_at columns |
|
||||||
|
| `migrations/074_worker_task_queue.sql` | Task queue table |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Legacy Code (DEPRECATED)
|
||||||
|
|
||||||
|
| File | Status | Replacement |
|
||||||
|
|------|--------|-------------|
|
||||||
|
| `src/services/scheduler.ts` | DEPRECATED | `task-scheduler.ts` |
|
||||||
|
| `dispensary_crawl_jobs` table | ORPHANED | `worker_tasks` |
|
||||||
|
| `job_schedules` table | LEGACY | `task_schedules` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Dashboard Integration
|
||||||
|
|
||||||
|
Both pages remain wired to the dashboard:
|
||||||
|
|
||||||
|
| Page | Data Source | Actions |
|
||||||
|
|------|-------------|---------|
|
||||||
|
| **Job Queue** | `worker_tasks`, `task_schedules` | Create tasks, view schedules |
|
||||||
|
| **Task Queue** | `worker_tasks` | View tasks, recover stale |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Multi-Replica Safety
|
||||||
|
|
||||||
|
The scheduler uses `SELECT FOR UPDATE SKIP LOCKED` to ensure:
|
||||||
|
|
||||||
|
1. **Only one replica** executes a schedule at a time
|
||||||
|
2. **No duplicate tasks** created
|
||||||
|
3. **Survives pod restarts** - state in DB, not memory
|
||||||
|
4. **Self-healing** - recovers stale tasks on startup
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- This query is atomic across all API server replicas
|
||||||
|
SELECT * FROM task_schedules
|
||||||
|
WHERE enabled = true AND next_run_at <= NOW()
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Worker Scaling (K8s)
|
||||||
|
|
||||||
|
Workers run as a StatefulSet in Kubernetes. You can scale from the admin UI or CLI.
|
||||||
|
|
||||||
|
### From Admin UI
|
||||||
|
|
||||||
|
The Workers page (`/admin/workers`) provides:
|
||||||
|
- Current replica count display
|
||||||
|
- Scale up/down buttons
|
||||||
|
- Target replica input
|
||||||
|
|
||||||
|
### API Endpoints
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `GET /api/workers/k8s/replicas` | GET | Get current/desired replica counts |
|
||||||
|
| `POST /api/workers/k8s/scale` | POST | Scale to N replicas (body: `{ replicas: N }`) |
|
||||||
|
|
||||||
|
### From CLI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# View current replicas
|
||||||
|
kubectl get statefulset scraper-worker -n dispensary-scraper
|
||||||
|
|
||||||
|
# Scale to 10 workers
|
||||||
|
kubectl scale statefulset scraper-worker -n dispensary-scraper --replicas=10
|
||||||
|
|
||||||
|
# Scale down to 3 workers
|
||||||
|
kubectl scale statefulset scraper-worker -n dispensary-scraper --replicas=3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
Environment variables for the API server:
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| `K8S_NAMESPACE` | `dispensary-scraper` | Kubernetes namespace |
|
||||||
|
| `K8S_WORKER_STATEFULSET` | `scraper-worker` | StatefulSet name |
|
||||||
|
|
||||||
|
### RBAC Requirements
|
||||||
|
|
||||||
|
The API server pod needs these K8s permissions:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: worker-scaler
|
||||||
|
namespace: dispensary-scraper
|
||||||
|
rules:
|
||||||
|
- apiGroups: ["apps"]
|
||||||
|
resources: ["statefulsets"]
|
||||||
|
verbs: ["get", "patch"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: scraper-worker-scaler
|
||||||
|
namespace: dispensary-scraper
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: default
|
||||||
|
namespace: dispensary-scraper
|
||||||
|
roleRef:
|
||||||
|
kind: Role
|
||||||
|
name: worker-scaler
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
```
|
||||||
639
backend/docs/_archive/WORKER_TASK_ARCHITECTURE.md
Normal file
639
backend/docs/_archive/WORKER_TASK_ARCHITECTURE.md
Normal file
@@ -0,0 +1,639 @@
|
|||||||
|
# Worker Task Architecture
|
||||||
|
|
||||||
|
This document describes the unified task-based worker system that replaces the legacy fragmented job systems.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The task worker architecture provides a single, unified system for managing all background work in CannaiQ:
|
||||||
|
|
||||||
|
- **Store discovery** - Find new dispensaries on platforms
|
||||||
|
- **Entry point discovery** - Resolve platform IDs from menu URLs
|
||||||
|
- **Product discovery** - Initial product fetch for new stores
|
||||||
|
- **Product resync** - Regular price/stock updates for existing stores
|
||||||
|
- **Analytics refresh** - Refresh materialized views and analytics
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Database Tables
|
||||||
|
|
||||||
|
**`worker_tasks`** - Central task queue
|
||||||
|
```sql
|
||||||
|
CREATE TABLE worker_tasks (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
role task_role NOT NULL, -- What type of work
|
||||||
|
dispensary_id INTEGER, -- Which store (if applicable)
|
||||||
|
platform VARCHAR(50), -- Which platform (dutchie, etc.)
|
||||||
|
status task_status DEFAULT 'pending',
|
||||||
|
priority INTEGER DEFAULT 0, -- Higher = process first
|
||||||
|
scheduled_for TIMESTAMP, -- Don't process before this time
|
||||||
|
worker_id VARCHAR(100), -- Which worker claimed it
|
||||||
|
claimed_at TIMESTAMP,
|
||||||
|
started_at TIMESTAMP,
|
||||||
|
completed_at TIMESTAMP,
|
||||||
|
last_heartbeat_at TIMESTAMP, -- For stale detection
|
||||||
|
result JSONB, -- Output from handler
|
||||||
|
error_message TEXT,
|
||||||
|
retry_count INTEGER DEFAULT 0,
|
||||||
|
max_retries INTEGER DEFAULT 3,
|
||||||
|
created_at TIMESTAMP DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMP DEFAULT NOW()
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key indexes:**
|
||||||
|
- `idx_worker_tasks_pending_priority` - For efficient task claiming
|
||||||
|
- `idx_worker_tasks_active_dispensary` - Prevents concurrent tasks per store (partial unique index)
|
||||||
|
|
||||||
|
### Task Roles
|
||||||
|
|
||||||
|
| Role | Purpose | Per-Store | Scheduled |
|
||||||
|
|------|---------|-----------|-----------|
|
||||||
|
| `store_discovery` | Find new stores on a platform | No | Daily |
|
||||||
|
| `entry_point_discovery` | Resolve platform IDs | Yes | On-demand |
|
||||||
|
| `product_discovery` | Initial product fetch | Yes | After entry_point |
|
||||||
|
| `product_resync` | Price/stock updates | Yes | Every 4 hours |
|
||||||
|
| `analytics_refresh` | Refresh MVs | No | Daily |
|
||||||
|
|
||||||
|
### Task Lifecycle
|
||||||
|
|
||||||
|
```
|
||||||
|
pending → claimed → running → completed
|
||||||
|
↓
|
||||||
|
failed
|
||||||
|
```
|
||||||
|
|
||||||
|
1. **pending** - Task is waiting to be picked up
|
||||||
|
2. **claimed** - Worker has claimed it (atomic via SELECT FOR UPDATE SKIP LOCKED)
|
||||||
|
3. **running** - Worker is actively processing
|
||||||
|
4. **completed** - Task finished successfully
|
||||||
|
5. **failed** - Task encountered an error
|
||||||
|
6. **stale** - Task lost its worker (recovered automatically)
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
### Core Files
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/tasks/task-service.ts` | TaskService - CRUD, claiming, capacity metrics |
|
||||||
|
| `src/tasks/task-worker.ts` | TaskWorker - Main worker loop |
|
||||||
|
| `src/tasks/index.ts` | Module exports |
|
||||||
|
| `src/routes/tasks.ts` | API endpoints |
|
||||||
|
| `migrations/074_worker_task_queue.sql` | Database schema |
|
||||||
|
|
||||||
|
### Task Handlers
|
||||||
|
|
||||||
|
| File | Role |
|
||||||
|
|------|------|
|
||||||
|
| `src/tasks/handlers/store-discovery.ts` | `store_discovery` |
|
||||||
|
| `src/tasks/handlers/entry-point-discovery.ts` | `entry_point_discovery` |
|
||||||
|
| `src/tasks/handlers/product-discovery.ts` | `product_discovery` |
|
||||||
|
| `src/tasks/handlers/product-resync.ts` | `product_resync` |
|
||||||
|
| `src/tasks/handlers/analytics-refresh.ts` | `analytics_refresh` |
|
||||||
|
|
||||||
|
## Running Workers
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| `WORKER_ROLE` | (required) | Which task role to process |
|
||||||
|
| `WORKER_ID` | auto-generated | Custom worker identifier |
|
||||||
|
| `POLL_INTERVAL_MS` | 5000 | How often to check for tasks |
|
||||||
|
| `HEARTBEAT_INTERVAL_MS` | 30000 | How often to update heartbeat |
|
||||||
|
|
||||||
|
### Starting a Worker
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start a product resync worker
|
||||||
|
WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts
|
||||||
|
|
||||||
|
# Start with custom ID
|
||||||
|
WORKER_ROLE=product_resync WORKER_ID=resync-1 npx tsx src/tasks/task-worker.ts
|
||||||
|
|
||||||
|
# Start multiple workers for different roles
|
||||||
|
WORKER_ROLE=store_discovery npx tsx src/tasks/task-worker.ts &
|
||||||
|
WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts &
|
||||||
|
```
|
||||||
|
|
||||||
|
### Kubernetes Deployment
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: task-worker-resync
|
||||||
|
spec:
|
||||||
|
replicas: 3
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: worker
|
||||||
|
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
|
||||||
|
command: ["npx", "tsx", "src/tasks/task-worker.ts"]
|
||||||
|
env:
|
||||||
|
- name: WORKER_ROLE
|
||||||
|
value: "product_resync"
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Task Management
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks` | GET | List tasks with filters |
|
||||||
|
| `/api/tasks` | POST | Create a new task |
|
||||||
|
| `/api/tasks/:id` | GET | Get task by ID |
|
||||||
|
| `/api/tasks/counts` | GET | Get counts by status |
|
||||||
|
| `/api/tasks/capacity` | GET | Get capacity metrics |
|
||||||
|
| `/api/tasks/capacity/:role` | GET | Get role-specific capacity |
|
||||||
|
| `/api/tasks/recover-stale` | POST | Recover tasks from dead workers |
|
||||||
|
|
||||||
|
### Task Generation
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks/generate/resync` | POST | Generate daily resync tasks |
|
||||||
|
| `/api/tasks/generate/discovery` | POST | Create store discovery task |
|
||||||
|
|
||||||
|
### Migration (from legacy systems)
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks/migration/status` | GET | Compare old vs new systems |
|
||||||
|
| `/api/tasks/migration/disable-old-schedules` | POST | Disable job_schedules |
|
||||||
|
| `/api/tasks/migration/cancel-pending-crawl-jobs` | POST | Cancel old crawl jobs |
|
||||||
|
| `/api/tasks/migration/create-resync-tasks` | POST | Create tasks for all stores |
|
||||||
|
| `/api/tasks/migration/full-migrate` | POST | One-click migration |
|
||||||
|
|
||||||
|
### Role-Specific Endpoints
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks/role/:role/last-completion` | GET | Last completion time |
|
||||||
|
| `/api/tasks/role/:role/recent` | GET | Recent completions |
|
||||||
|
| `/api/tasks/store/:id/active` | GET | Check if store has active task |
|
||||||
|
|
||||||
|
## Capacity Planning
|
||||||
|
|
||||||
|
The `v_worker_capacity` view provides real-time metrics:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM v_worker_capacity;
|
||||||
|
```
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- `pending_tasks` - Tasks waiting to be claimed
|
||||||
|
- `ready_tasks` - Tasks ready now (scheduled_for is null or past)
|
||||||
|
- `claimed_tasks` - Tasks claimed but not started
|
||||||
|
- `running_tasks` - Tasks actively processing
|
||||||
|
- `completed_last_hour` - Recent completions
|
||||||
|
- `failed_last_hour` - Recent failures
|
||||||
|
- `active_workers` - Workers with recent heartbeats
|
||||||
|
- `avg_duration_sec` - Average task duration
|
||||||
|
- `tasks_per_worker_hour` - Throughput estimate
|
||||||
|
- `estimated_hours_to_drain` - Time to clear queue
|
||||||
|
|
||||||
|
### Scaling Recommendations
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// API: GET /api/tasks/capacity/:role
|
||||||
|
{
|
||||||
|
"role": "product_resync",
|
||||||
|
"pending_tasks": 500,
|
||||||
|
"active_workers": 3,
|
||||||
|
"workers_needed": {
|
||||||
|
"for_1_hour": 10,
|
||||||
|
"for_4_hours": 3,
|
||||||
|
"for_8_hours": 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Task Chaining
|
||||||
|
|
||||||
|
Tasks can automatically create follow-up tasks:
|
||||||
|
|
||||||
|
```
|
||||||
|
store_discovery → entry_point_discovery → product_discovery
|
||||||
|
↓
|
||||||
|
(store has platform_dispensary_id)
|
||||||
|
↓
|
||||||
|
Daily resync tasks
|
||||||
|
```
|
||||||
|
|
||||||
|
The `chainNextTask()` method handles this automatically.
|
||||||
|
|
||||||
|
## Stale Task Recovery
|
||||||
|
|
||||||
|
Tasks are considered stale if `last_heartbeat_at` is older than the threshold (default 10 minutes).
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT recover_stale_tasks(10); -- 10 minute threshold
|
||||||
|
```
|
||||||
|
|
||||||
|
Or via API:
|
||||||
|
```bash
|
||||||
|
curl -X POST /api/tasks/recover-stale \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"threshold_minutes": 10}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Migration from Legacy Systems
|
||||||
|
|
||||||
|
### Legacy Systems Replaced
|
||||||
|
|
||||||
|
1. **job_schedules + job_run_logs** - Scheduled job definitions
|
||||||
|
2. **dispensary_crawl_jobs** - Per-dispensary crawl queue
|
||||||
|
3. **SyncOrchestrator + HydrationWorker** - Raw payload processing
|
||||||
|
|
||||||
|
### Migration Steps
|
||||||
|
|
||||||
|
**Option 1: One-Click Migration**
|
||||||
|
```bash
|
||||||
|
curl -X POST /api/tasks/migration/full-migrate
|
||||||
|
```
|
||||||
|
|
||||||
|
This will:
|
||||||
|
1. Disable all job_schedules
|
||||||
|
2. Cancel pending dispensary_crawl_jobs
|
||||||
|
3. Generate resync tasks for all stores
|
||||||
|
4. Create discovery and analytics tasks
|
||||||
|
|
||||||
|
**Option 2: Manual Migration**
|
||||||
|
```bash
|
||||||
|
# 1. Check current status
|
||||||
|
curl /api/tasks/migration/status
|
||||||
|
|
||||||
|
# 2. Disable old schedules
|
||||||
|
curl -X POST /api/tasks/migration/disable-old-schedules
|
||||||
|
|
||||||
|
# 3. Cancel pending crawl jobs
|
||||||
|
curl -X POST /api/tasks/migration/cancel-pending-crawl-jobs
|
||||||
|
|
||||||
|
# 4. Create resync tasks
|
||||||
|
curl -X POST /api/tasks/migration/create-resync-tasks \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"state_code": "AZ"}'
|
||||||
|
|
||||||
|
# 5. Generate daily resync schedule
|
||||||
|
curl -X POST /api/tasks/generate/resync \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"batches_per_day": 6}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Per-Store Locking
|
||||||
|
|
||||||
|
The system prevents concurrent tasks for the same store using a partial unique index:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE UNIQUE INDEX idx_worker_tasks_active_dispensary
|
||||||
|
ON worker_tasks (dispensary_id)
|
||||||
|
WHERE dispensary_id IS NOT NULL
|
||||||
|
AND status IN ('claimed', 'running');
|
||||||
|
```
|
||||||
|
|
||||||
|
This ensures only one task can be active per store at any time.
|
||||||
|
|
||||||
|
## Task Priority
|
||||||
|
|
||||||
|
Tasks are claimed in priority order (higher first), then by creation time:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
ORDER BY priority DESC, created_at ASC
|
||||||
|
```
|
||||||
|
|
||||||
|
Default priorities:
|
||||||
|
- `store_discovery`: 0
|
||||||
|
- `entry_point_discovery`: 10 (high - new stores)
|
||||||
|
- `product_discovery`: 10 (high - new stores)
|
||||||
|
- `product_resync`: 0
|
||||||
|
- `analytics_refresh`: 0
|
||||||
|
|
||||||
|
## Scheduled Tasks
|
||||||
|
|
||||||
|
Tasks can be scheduled for future execution:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
await taskService.createTask({
|
||||||
|
role: 'product_resync',
|
||||||
|
dispensary_id: 123,
|
||||||
|
scheduled_for: new Date('2025-01-10T06:00:00Z'),
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
The `generate_resync_tasks()` function creates staggered tasks throughout the day:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT generate_resync_tasks(6, '2025-01-10'); -- 6 batches = every 4 hours
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dashboard Integration
|
||||||
|
|
||||||
|
The admin dashboard shows task queue status in the main overview:
|
||||||
|
|
||||||
|
```
|
||||||
|
Task Queue Summary
|
||||||
|
------------------
|
||||||
|
Pending: 45
|
||||||
|
Running: 3
|
||||||
|
Completed: 1,234
|
||||||
|
Failed: 12
|
||||||
|
```
|
||||||
|
|
||||||
|
Full task management is available at `/admin/tasks`.
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
Failed tasks include the error message in `error_message` and can be retried:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- View failed tasks
|
||||||
|
SELECT id, role, dispensary_id, error_message, retry_count
|
||||||
|
FROM worker_tasks
|
||||||
|
WHERE status = 'failed'
|
||||||
|
ORDER BY completed_at DESC
|
||||||
|
LIMIT 20;
|
||||||
|
|
||||||
|
-- Retry failed tasks
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET status = 'pending', retry_count = retry_count + 1
|
||||||
|
WHERE status = 'failed' AND retry_count < max_retries;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Concurrent Task Processing (Added 2024-12)
|
||||||
|
|
||||||
|
Workers can now process multiple tasks concurrently within a single worker instance. This improves throughput by utilizing async I/O efficiently.
|
||||||
|
|
||||||
|
### Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ Pod (K8s) │
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ TaskWorker │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
|
||||||
|
│ │ │ Task 1 │ │ Task 2 │ │ Task 3 │ (concurrent)│ │
|
||||||
|
│ │ └─────────┘ └─────────┘ └─────────┘ │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ Resource Monitor │ │
|
||||||
|
│ │ ├── Memory: 65% (threshold: 85%) │ │
|
||||||
|
│ │ ├── CPU: 45% (threshold: 90%) │ │
|
||||||
|
│ │ └── Status: Normal │ │
|
||||||
|
│ └─────────────────────────────────────────────────────┘ │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| `MAX_CONCURRENT_TASKS` | 3 | Maximum tasks a worker will run concurrently |
|
||||||
|
| `MEMORY_BACKOFF_THRESHOLD` | 0.85 | Back off when heap memory exceeds 85% |
|
||||||
|
| `CPU_BACKOFF_THRESHOLD` | 0.90 | Back off when CPU exceeds 90% |
|
||||||
|
| `BACKOFF_DURATION_MS` | 10000 | How long to wait when backing off (10s) |
|
||||||
|
|
||||||
|
### How It Works
|
||||||
|
|
||||||
|
1. **Main Loop**: Worker continuously tries to fill up to `MAX_CONCURRENT_TASKS`
|
||||||
|
2. **Resource Monitoring**: Before claiming a new task, worker checks memory and CPU
|
||||||
|
3. **Backoff**: If resources exceed thresholds, worker pauses and stops claiming new tasks
|
||||||
|
4. **Concurrent Execution**: Tasks run in parallel using `Promise` - they don't block each other
|
||||||
|
5. **Graceful Shutdown**: On SIGTERM/decommission, worker stops claiming but waits for active tasks
|
||||||
|
|
||||||
|
### Resource Monitoring
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ResourceStats interface
|
||||||
|
interface ResourceStats {
|
||||||
|
memoryPercent: number; // Current heap usage as decimal (0.0-1.0)
|
||||||
|
memoryMb: number; // Current heap used in MB
|
||||||
|
memoryTotalMb: number; // Total heap available in MB
|
||||||
|
cpuPercent: number; // CPU usage as percentage (0-100)
|
||||||
|
isBackingOff: boolean; // True if worker is in backoff state
|
||||||
|
backoffReason: string; // Why the worker is backing off
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Heartbeat Data
|
||||||
|
|
||||||
|
Workers report the following in their heartbeat:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"worker_id": "worker-abc123",
|
||||||
|
"current_task_id": 456,
|
||||||
|
"current_task_ids": [456, 457, 458],
|
||||||
|
"active_task_count": 3,
|
||||||
|
"max_concurrent_tasks": 3,
|
||||||
|
"status": "active",
|
||||||
|
"resources": {
|
||||||
|
"memory_mb": 256,
|
||||||
|
"memory_total_mb": 512,
|
||||||
|
"memory_rss_mb": 320,
|
||||||
|
"memory_percent": 50,
|
||||||
|
"cpu_user_ms": 12500,
|
||||||
|
"cpu_system_ms": 3200,
|
||||||
|
"cpu_percent": 45,
|
||||||
|
"is_backing_off": false,
|
||||||
|
"backoff_reason": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Backoff Behavior
|
||||||
|
|
||||||
|
When resources exceed thresholds:
|
||||||
|
|
||||||
|
1. Worker logs the backoff reason:
|
||||||
|
```
|
||||||
|
[TaskWorker] MyWorker backing off: Memory at 87.3% (threshold: 85%)
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Worker stops claiming new tasks but continues existing tasks
|
||||||
|
|
||||||
|
3. After `BACKOFF_DURATION_MS`, worker rechecks resources
|
||||||
|
|
||||||
|
4. When resources return to normal:
|
||||||
|
```
|
||||||
|
[TaskWorker] MyWorker resuming normal operation
|
||||||
|
```
|
||||||
|
|
||||||
|
### UI Display
|
||||||
|
|
||||||
|
The Workers Dashboard shows:
|
||||||
|
|
||||||
|
- **Tasks Column**: `2/3 tasks` (active/max concurrent)
|
||||||
|
- **Resources Column**: Memory % and CPU % with color coding
|
||||||
|
- Green: < 50%
|
||||||
|
- Yellow: 50-74%
|
||||||
|
- Amber: 75-89%
|
||||||
|
- Red: 90%+
|
||||||
|
- **Backing Off**: Orange warning badge when worker is in backoff state
|
||||||
|
|
||||||
|
### Task Count Badge Details
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────┐
|
||||||
|
│ Worker: "MyWorker" │
|
||||||
|
│ Tasks: 2/3 tasks #456, #457 │
|
||||||
|
│ Resources: 🧠 65% 💻 45% │
|
||||||
|
│ Status: ● Active │
|
||||||
|
└─────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Best Practices
|
||||||
|
|
||||||
|
1. **Start Conservative**: Use `MAX_CONCURRENT_TASKS=3` initially
|
||||||
|
2. **Monitor Resources**: Watch for frequent backoffs in logs
|
||||||
|
3. **Tune Per Workload**: I/O-bound tasks benefit from higher concurrency
|
||||||
|
4. **Scale Horizontally**: Add more pods rather than cranking concurrency too high
|
||||||
|
|
||||||
|
### Code References
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/tasks/task-worker.ts:68-71` | Concurrency environment variables |
|
||||||
|
| `src/tasks/task-worker.ts:104-111` | ResourceStats interface |
|
||||||
|
| `src/tasks/task-worker.ts:149-179` | getResourceStats() method |
|
||||||
|
| `src/tasks/task-worker.ts:184-196` | shouldBackOff() method |
|
||||||
|
| `src/tasks/task-worker.ts:462-516` | mainLoop() with concurrent claiming |
|
||||||
|
| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
|
||||||
|
| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
|
||||||
|
|
||||||
|
## Browser Task Memory Limits (Updated 2025-12)
|
||||||
|
|
||||||
|
Browser-based tasks (Puppeteer/Chrome) have strict memory constraints that limit concurrency.
|
||||||
|
|
||||||
|
### Why Browser Tasks Are Different
|
||||||
|
|
||||||
|
Each browser task launches a Chrome process. Unlike I/O-bound API calls, browsers consume significant RAM:
|
||||||
|
|
||||||
|
| Component | RAM Usage |
|
||||||
|
|-----------|-----------|
|
||||||
|
| Node.js runtime | ~150 MB |
|
||||||
|
| Chrome browser (base) | ~200-250 MB |
|
||||||
|
| Dutchie menu page (loaded) | ~100-150 MB |
|
||||||
|
| **Per browser total** | **~350-450 MB** |
|
||||||
|
|
||||||
|
### Memory Math for Pod Limits
|
||||||
|
|
||||||
|
```
|
||||||
|
Pod memory limit: 2 GB (2000 MB)
|
||||||
|
Node.js runtime: -150 MB
|
||||||
|
Safety buffer: -100 MB
|
||||||
|
────────────────────────────────
|
||||||
|
Available for browsers: 1750 MB
|
||||||
|
|
||||||
|
Per browser + page: ~400 MB
|
||||||
|
|
||||||
|
Max browsers: 1750 ÷ 400 = ~4 browsers
|
||||||
|
|
||||||
|
Recommended: 3 browsers (leaves headroom for spikes)
|
||||||
|
```
|
||||||
|
|
||||||
|
### MAX_CONCURRENT_TASKS for Browser Tasks
|
||||||
|
|
||||||
|
| Browsers per Pod | RAM Used | Risk Level |
|
||||||
|
|------------------|----------|------------|
|
||||||
|
| 1 | ~500 MB | Very safe |
|
||||||
|
| 2 | ~900 MB | Safe |
|
||||||
|
| **3** | **~1.3 GB** | **Recommended** |
|
||||||
|
| 4 | ~1.7 GB | Tight (may OOM) |
|
||||||
|
| 5+ | >2 GB | Will OOM crash |
|
||||||
|
|
||||||
|
**CRITICAL**: `MAX_CONCURRENT_TASKS=3` is the maximum safe value for browser tasks with current pod limits.
|
||||||
|
|
||||||
|
### Scaling Strategy
|
||||||
|
|
||||||
|
Scale **horizontally** (more pods) rather than vertically (more concurrency per pod):
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ Cluster: 8 pods × 3 browsers = 24 concurrent tasks │
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||||
|
│ │ Pod 0 │ │ Pod 1 │ │ Pod 2 │ │ Pod 3 │ │
|
||||||
|
│ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │
|
||||||
|
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||||
|
│ │ Pod 4 │ │ Pod 5 │ │ Pod 6 │ │ Pod 7 │ │
|
||||||
|
│ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │
|
||||||
|
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Browser Lifecycle Per Task
|
||||||
|
|
||||||
|
Each task gets a fresh browser with fresh IP/identity:
|
||||||
|
|
||||||
|
```
|
||||||
|
1. Claim task from queue
|
||||||
|
2. Get fresh proxy from pool
|
||||||
|
3. Launch browser with proxy
|
||||||
|
4. Run preflight (verify IP)
|
||||||
|
5. Execute scrape
|
||||||
|
6. Close browser
|
||||||
|
7. Repeat
|
||||||
|
```
|
||||||
|
|
||||||
|
This ensures:
|
||||||
|
- Fresh IP per task (proxy rotation)
|
||||||
|
- Fresh fingerprint per task (UA rotation)
|
||||||
|
- No cookie/session bleed between tasks
|
||||||
|
- Predictable memory usage
|
||||||
|
|
||||||
|
### Increasing Capacity
|
||||||
|
|
||||||
|
To handle more concurrent tasks:
|
||||||
|
|
||||||
|
1. **Add more pods** (up to 8 per CLAUDE.md limit)
|
||||||
|
2. **Increase pod memory** (allows 4 browsers per pod):
|
||||||
|
```yaml
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: "2.5Gi" # from 2Gi
|
||||||
|
```
|
||||||
|
|
||||||
|
**DO NOT** simply increase `MAX_CONCURRENT_TASKS` without also increasing pod memory limits.
|
||||||
|
|
||||||
|
## Monitoring
|
||||||
|
|
||||||
|
### Logs
|
||||||
|
|
||||||
|
Workers log to stdout:
|
||||||
|
```
|
||||||
|
[TaskWorker] Starting worker worker-product_resync-a1b2c3d4 for role: product_resync
|
||||||
|
[TaskWorker] Claimed task 123 (product_resync) for dispensary 456
|
||||||
|
[TaskWorker] Task 123 completed successfully
|
||||||
|
```
|
||||||
|
|
||||||
|
### Health Check
|
||||||
|
|
||||||
|
Check if workers are active:
|
||||||
|
```sql
|
||||||
|
SELECT worker_id, role, COUNT(*), MAX(last_heartbeat_at)
|
||||||
|
FROM worker_tasks
|
||||||
|
WHERE last_heartbeat_at > NOW() - INTERVAL '5 minutes'
|
||||||
|
GROUP BY worker_id, role;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Metrics
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Tasks by status
|
||||||
|
SELECT status, COUNT(*) FROM worker_tasks GROUP BY status;
|
||||||
|
|
||||||
|
-- Tasks by role
|
||||||
|
SELECT role, status, COUNT(*) FROM worker_tasks GROUP BY role, status;
|
||||||
|
|
||||||
|
-- Average duration by role
|
||||||
|
SELECT role, AVG(EXTRACT(EPOCH FROM (completed_at - started_at))) as avg_seconds
|
||||||
|
FROM worker_tasks
|
||||||
|
WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours'
|
||||||
|
GROUP BY role;
|
||||||
|
```
|
||||||
69
backend/k8s/cronjob-ip2location.yaml
Normal file
69
backend/k8s/cronjob-ip2location.yaml
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
apiVersion: batch/v1
|
||||||
|
kind: CronJob
|
||||||
|
metadata:
|
||||||
|
name: ip2location-update
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
# Run on the 1st of every month at 3am UTC
|
||||||
|
schedule: "0 3 1 * *"
|
||||||
|
concurrencyPolicy: Forbid
|
||||||
|
successfulJobsHistoryLimit: 3
|
||||||
|
failedJobsHistoryLimit: 3
|
||||||
|
jobTemplate:
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: ip2location-updater
|
||||||
|
image: curlimages/curl:latest
|
||||||
|
command:
|
||||||
|
- /bin/sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
set -e
|
||||||
|
echo "Downloading IP2Location LITE DB5..."
|
||||||
|
|
||||||
|
# Download to temp
|
||||||
|
cd /tmp
|
||||||
|
curl -L -o ip2location.zip "https://www.ip2location.com/download/?token=${IP2LOCATION_TOKEN}&file=DB5LITEBIN"
|
||||||
|
|
||||||
|
# Extract
|
||||||
|
unzip -o ip2location.zip
|
||||||
|
|
||||||
|
# Find and copy the BIN file
|
||||||
|
BIN_FILE=$(ls *.BIN 2>/dev/null | head -1)
|
||||||
|
if [ -z "$BIN_FILE" ]; then
|
||||||
|
echo "ERROR: No BIN file found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Copy to shared volume
|
||||||
|
cp "$BIN_FILE" /data/IP2LOCATION-LITE-DB5.BIN
|
||||||
|
|
||||||
|
echo "Done! Database updated: /data/IP2LOCATION-LITE-DB5.BIN"
|
||||||
|
env:
|
||||||
|
- name: IP2LOCATION_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: dutchie-backend-secret
|
||||||
|
key: IP2LOCATION_TOKEN
|
||||||
|
volumeMounts:
|
||||||
|
- name: ip2location-data
|
||||||
|
mountPath: /data
|
||||||
|
restartPolicy: OnFailure
|
||||||
|
volumes:
|
||||||
|
- name: ip2location-data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: ip2location-pvc
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: ip2location-pvc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 100Mi
|
||||||
@@ -26,6 +26,12 @@ spec:
|
|||||||
name: dutchie-backend-config
|
name: dutchie-backend-config
|
||||||
- secretRef:
|
- secretRef:
|
||||||
name: dutchie-backend-secret
|
name: dutchie-backend-secret
|
||||||
|
env:
|
||||||
|
- name: IP2LOCATION_DB_PATH
|
||||||
|
value: /data/ip2location/IP2LOCATION-LITE-DB5.BIN
|
||||||
|
volumeMounts:
|
||||||
|
- name: ip2location-data
|
||||||
|
mountPath: /data/ip2location
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
memory: "256Mi"
|
memory: "256Mi"
|
||||||
@@ -45,3 +51,7 @@ spec:
|
|||||||
port: 3010
|
port: 3010
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
|
volumes:
|
||||||
|
- name: ip2location-data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: ip2location-pvc
|
||||||
|
|||||||
77
backend/k8s/scraper-worker-statefulset.yaml
Normal file
77
backend/k8s/scraper-worker-statefulset.yaml
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: scraper-worker
|
||||||
|
namespace: cannaiq
|
||||||
|
labels:
|
||||||
|
app: scraper-worker
|
||||||
|
spec:
|
||||||
|
clusterIP: None # Headless service required for StatefulSet
|
||||||
|
selector:
|
||||||
|
app: scraper-worker
|
||||||
|
ports:
|
||||||
|
- port: 3010
|
||||||
|
name: http
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
metadata:
|
||||||
|
name: scraper-worker
|
||||||
|
namespace: cannaiq
|
||||||
|
spec:
|
||||||
|
serviceName: scraper-worker
|
||||||
|
replicas: 8
|
||||||
|
podManagementPolicy: Parallel # Start all pods at once
|
||||||
|
updateStrategy:
|
||||||
|
type: OnDelete # Pods only update when manually deleted - no automatic restarts
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: scraper-worker
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: scraper-worker
|
||||||
|
spec:
|
||||||
|
terminationGracePeriodSeconds: 60
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: regcred
|
||||||
|
containers:
|
||||||
|
- name: worker
|
||||||
|
image: git.spdy.io/creationshop/cannaiq:latest
|
||||||
|
imagePullPolicy: Always
|
||||||
|
command: ["node"]
|
||||||
|
args: ["dist/tasks/task-worker.js"]
|
||||||
|
env:
|
||||||
|
- name: WORKER_MODE
|
||||||
|
value: "true"
|
||||||
|
- name: POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: MAX_CONCURRENT_TASKS
|
||||||
|
value: "50"
|
||||||
|
- name: API_BASE_URL
|
||||||
|
value: http://scraper
|
||||||
|
- name: NODE_OPTIONS
|
||||||
|
value: --max-old-space-size=1500
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: scraper-config
|
||||||
|
- secretRef:
|
||||||
|
name: scraper-secrets
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 1Gi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 2Gi
|
||||||
|
livenessProbe:
|
||||||
|
exec:
|
||||||
|
command:
|
||||||
|
- /bin/sh
|
||||||
|
- -c
|
||||||
|
- pgrep -f 'task-worker' > /dev/null
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 30
|
||||||
|
failureThreshold: 3
|
||||||
@@ -1,18 +1,18 @@
|
|||||||
-- Add location columns to proxies table
|
-- Add location columns to proxies table
|
||||||
ALTER TABLE proxies
|
ALTER TABLE proxies
|
||||||
ADD COLUMN city VARCHAR(100),
|
ADD COLUMN IF NOT EXISTS city VARCHAR(100),
|
||||||
ADD COLUMN state VARCHAR(100),
|
ADD COLUMN IF NOT EXISTS state VARCHAR(100),
|
||||||
ADD COLUMN country VARCHAR(100),
|
ADD COLUMN IF NOT EXISTS country VARCHAR(100),
|
||||||
ADD COLUMN country_code VARCHAR(2),
|
ADD COLUMN IF NOT EXISTS country_code VARCHAR(2),
|
||||||
ADD COLUMN location_updated_at TIMESTAMP;
|
ADD COLUMN IF NOT EXISTS location_updated_at TIMESTAMP;
|
||||||
|
|
||||||
-- Add index for location-based queries
|
-- Add index for location-based queries
|
||||||
CREATE INDEX idx_proxies_location ON proxies(country_code, state, city);
|
CREATE INDEX IF NOT EXISTS idx_proxies_location ON proxies(country_code, state, city);
|
||||||
|
|
||||||
-- Add the same to failed_proxies table
|
-- Add the same to failed_proxies table
|
||||||
ALTER TABLE failed_proxies
|
ALTER TABLE failed_proxies
|
||||||
ADD COLUMN city VARCHAR(100),
|
ADD COLUMN IF NOT EXISTS city VARCHAR(100),
|
||||||
ADD COLUMN state VARCHAR(100),
|
ADD COLUMN IF NOT EXISTS state VARCHAR(100),
|
||||||
ADD COLUMN country VARCHAR(100),
|
ADD COLUMN IF NOT EXISTS country VARCHAR(100),
|
||||||
ADD COLUMN country_code VARCHAR(2),
|
ADD COLUMN IF NOT EXISTS country_code VARCHAR(2),
|
||||||
ADD COLUMN location_updated_at TIMESTAMP;
|
ADD COLUMN IF NOT EXISTS location_updated_at TIMESTAMP;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
-- Create dispensaries table as single source of truth
|
-- Create dispensaries table as single source of truth
|
||||||
-- This consolidates azdhs_list (official data) + stores (menu data) into one table
|
-- This consolidates azdhs_list (official data) + stores (menu data) into one table
|
||||||
CREATE TABLE dispensaries (
|
CREATE TABLE IF NOT EXISTS dispensaries (
|
||||||
-- Primary key
|
-- Primary key
|
||||||
id SERIAL PRIMARY KEY,
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
@@ -43,11 +43,11 @@ CREATE TABLE dispensaries (
|
|||||||
);
|
);
|
||||||
|
|
||||||
-- Create indexes for common queries
|
-- Create indexes for common queries
|
||||||
CREATE INDEX idx_dispensaries_city ON dispensaries(city);
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_city ON dispensaries(city);
|
||||||
CREATE INDEX idx_dispensaries_state ON dispensaries(state);
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_state ON dispensaries(state);
|
||||||
CREATE INDEX idx_dispensaries_slug ON dispensaries(slug);
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_slug ON dispensaries(slug);
|
||||||
CREATE INDEX idx_dispensaries_azdhs_id ON dispensaries(azdhs_id);
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_azdhs_id ON dispensaries(azdhs_id);
|
||||||
CREATE INDEX idx_dispensaries_menu_status ON dispensaries(menu_scrape_status);
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_menu_status ON dispensaries(menu_scrape_status);
|
||||||
|
|
||||||
-- Create index for location-based queries
|
-- Create index for location-based queries
|
||||||
CREATE INDEX idx_dispensaries_location ON dispensaries(latitude, longitude) WHERE latitude IS NOT NULL AND longitude IS NOT NULL;
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_location ON dispensaries(latitude, longitude) WHERE latitude IS NOT NULL AND longitude IS NOT NULL;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
-- Create dispensary_changes table for change approval workflow
|
-- Create dispensary_changes table for change approval workflow
|
||||||
-- This protects against accidental data destruction by requiring manual review
|
-- This protects against accidental data destruction by requiring manual review
|
||||||
CREATE TABLE dispensary_changes (
|
CREATE TABLE IF NOT EXISTS dispensary_changes (
|
||||||
id SERIAL PRIMARY KEY,
|
id SERIAL PRIMARY KEY,
|
||||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
|
||||||
@@ -26,10 +26,10 @@ CREATE TABLE dispensary_changes (
|
|||||||
);
|
);
|
||||||
|
|
||||||
-- Create indexes for common queries
|
-- Create indexes for common queries
|
||||||
CREATE INDEX idx_dispensary_changes_status ON dispensary_changes(status);
|
CREATE INDEX IF NOT EXISTS idx_dispensary_changes_status ON dispensary_changes(status);
|
||||||
CREATE INDEX idx_dispensary_changes_dispensary_status ON dispensary_changes(dispensary_id, status);
|
CREATE INDEX IF NOT EXISTS idx_dispensary_changes_dispensary_status ON dispensary_changes(dispensary_id, status);
|
||||||
CREATE INDEX idx_dispensary_changes_created_at ON dispensary_changes(created_at DESC);
|
CREATE INDEX IF NOT EXISTS idx_dispensary_changes_created_at ON dispensary_changes(created_at DESC);
|
||||||
CREATE INDEX idx_dispensary_changes_requires_recrawl ON dispensary_changes(requires_recrawl) WHERE requires_recrawl = TRUE;
|
CREATE INDEX IF NOT EXISTS idx_dispensary_changes_requires_recrawl ON dispensary_changes(requires_recrawl) WHERE requires_recrawl = TRUE;
|
||||||
|
|
||||||
-- Create function to automatically set requires_recrawl for website/menu_url changes
|
-- Create function to automatically set requires_recrawl for website/menu_url changes
|
||||||
CREATE OR REPLACE FUNCTION set_requires_recrawl()
|
CREATE OR REPLACE FUNCTION set_requires_recrawl()
|
||||||
@@ -42,7 +42,8 @@ BEGIN
|
|||||||
END;
|
END;
|
||||||
$$ LANGUAGE plpgsql;
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
-- Create trigger to call the function
|
-- Create trigger to call the function (drop first to make idempotent)
|
||||||
|
DROP TRIGGER IF EXISTS trigger_set_requires_recrawl ON dispensary_changes;
|
||||||
CREATE TRIGGER trigger_set_requires_recrawl
|
CREATE TRIGGER trigger_set_requires_recrawl
|
||||||
BEFORE INSERT ON dispensary_changes
|
BEFORE INSERT ON dispensary_changes
|
||||||
FOR EACH ROW
|
FOR EACH ROW
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
-- Populate dispensaries table from azdhs_list
|
-- Populate dispensaries table from azdhs_list
|
||||||
-- This migrates all 182 AZDHS records with their enriched Google Maps data
|
-- This migrates all 182 AZDHS records with their enriched Google Maps data
|
||||||
-- For multi-location dispensaries with duplicate slugs, append city name to make unique
|
-- For multi-location dispensaries with duplicate slugs, append city name to make unique
|
||||||
|
-- IDEMPOTENT: Uses ON CONFLICT DO NOTHING to skip already-imported records
|
||||||
|
|
||||||
WITH ranked_dispensaries AS (
|
WITH ranked_dispensaries AS (
|
||||||
SELECT
|
SELECT
|
||||||
@@ -78,9 +79,10 @@ SELECT
|
|||||||
created_at,
|
created_at,
|
||||||
updated_at
|
updated_at
|
||||||
FROM ranked_dispensaries
|
FROM ranked_dispensaries
|
||||||
ORDER BY id;
|
ORDER BY id
|
||||||
|
ON CONFLICT (azdhs_id) DO NOTHING;
|
||||||
|
|
||||||
-- Verify the migration
|
-- Verify the migration (idempotent - just logs, doesn't fail)
|
||||||
DO $$
|
DO $$
|
||||||
DECLARE
|
DECLARE
|
||||||
source_count INTEGER;
|
source_count INTEGER;
|
||||||
@@ -89,9 +91,11 @@ BEGIN
|
|||||||
SELECT COUNT(*) INTO source_count FROM azdhs_list;
|
SELECT COUNT(*) INTO source_count FROM azdhs_list;
|
||||||
SELECT COUNT(*) INTO dest_count FROM dispensaries;
|
SELECT COUNT(*) INTO dest_count FROM dispensaries;
|
||||||
|
|
||||||
RAISE NOTICE 'Migration complete: % records from azdhs_list → % records in dispensaries', source_count, dest_count;
|
RAISE NOTICE 'Migration status: % records in azdhs_list, % records in dispensaries', source_count, dest_count;
|
||||||
|
|
||||||
IF source_count != dest_count THEN
|
IF dest_count >= source_count THEN
|
||||||
RAISE EXCEPTION 'Record count mismatch! Expected %, got %', source_count, dest_count;
|
RAISE NOTICE 'OK: dispensaries table has expected records';
|
||||||
|
ELSE
|
||||||
|
RAISE WARNING 'dispensaries has fewer records than azdhs_list (% vs %)', dest_count, source_count;
|
||||||
END IF;
|
END IF;
|
||||||
END $$;
|
END $$;
|
||||||
|
|||||||
@@ -3,15 +3,15 @@
|
|||||||
|
|
||||||
-- Add dispensary_id to products table
|
-- Add dispensary_id to products table
|
||||||
ALTER TABLE products
|
ALTER TABLE products
|
||||||
ADD COLUMN dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE CASCADE;
|
ADD COLUMN IF NOT EXISTS dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE CASCADE;
|
||||||
|
|
||||||
-- Add dispensary_id to categories table
|
-- Add dispensary_id to categories table
|
||||||
ALTER TABLE categories
|
ALTER TABLE categories
|
||||||
ADD COLUMN dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE CASCADE;
|
ADD COLUMN IF NOT EXISTS dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE CASCADE;
|
||||||
|
|
||||||
-- Create indexes for the new foreign keys
|
-- Create indexes for the new foreign keys
|
||||||
CREATE INDEX idx_products_dispensary_id ON products(dispensary_id);
|
CREATE INDEX IF NOT EXISTS idx_products_dispensary_id ON products(dispensary_id);
|
||||||
CREATE INDEX idx_categories_dispensary_id ON categories(dispensary_id);
|
CREATE INDEX IF NOT EXISTS idx_categories_dispensary_id ON categories(dispensary_id);
|
||||||
|
|
||||||
-- NOTE: We'll populate these FKs and migrate data from stores in a separate data migration
|
-- NOTE: We'll populate these FKs and migrate data from stores in a separate data migration
|
||||||
-- For now, new scrapers should use dispensary_id, but old store_id still works
|
-- For now, new scrapers should use dispensary_id, but old store_id still works
|
||||||
|
|||||||
119
backend/migrations/051_worker_definitions.sql
Normal file
119
backend/migrations/051_worker_definitions.sql
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
-- Migration 051: Worker Definitions
|
||||||
|
-- Creates a dedicated workers table for named workers with roles and assignments
|
||||||
|
|
||||||
|
-- Workers table - defines named workers with roles
|
||||||
|
CREATE TABLE IF NOT EXISTS workers (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(100) NOT NULL UNIQUE,
|
||||||
|
role VARCHAR(100) NOT NULL,
|
||||||
|
description TEXT,
|
||||||
|
enabled BOOLEAN DEFAULT TRUE,
|
||||||
|
|
||||||
|
-- Schedule configuration (for dedicated crawl workers)
|
||||||
|
schedule_type VARCHAR(50) DEFAULT 'interval', -- 'interval', 'cron', 'manual'
|
||||||
|
interval_minutes INTEGER DEFAULT 240,
|
||||||
|
cron_expression VARCHAR(100), -- e.g., '0 */4 * * *'
|
||||||
|
jitter_minutes INTEGER DEFAULT 30,
|
||||||
|
|
||||||
|
-- Assignment scope
|
||||||
|
assignment_type VARCHAR(50) DEFAULT 'all', -- 'all', 'state', 'dispensary', 'chain'
|
||||||
|
assigned_state_codes TEXT[], -- e.g., ['AZ', 'CA']
|
||||||
|
assigned_dispensary_ids INTEGER[],
|
||||||
|
assigned_chain_ids INTEGER[],
|
||||||
|
|
||||||
|
-- Job configuration
|
||||||
|
job_type VARCHAR(50) NOT NULL DEFAULT 'dutchie_product_crawl',
|
||||||
|
job_config JSONB DEFAULT '{}',
|
||||||
|
priority INTEGER DEFAULT 0,
|
||||||
|
max_concurrent INTEGER DEFAULT 1,
|
||||||
|
|
||||||
|
-- Status tracking
|
||||||
|
status VARCHAR(50) DEFAULT 'idle', -- 'idle', 'running', 'paused', 'error'
|
||||||
|
last_run_at TIMESTAMPTZ,
|
||||||
|
last_status VARCHAR(50),
|
||||||
|
last_error TEXT,
|
||||||
|
last_duration_ms INTEGER,
|
||||||
|
next_run_at TIMESTAMPTZ,
|
||||||
|
current_job_id INTEGER,
|
||||||
|
|
||||||
|
-- Metrics
|
||||||
|
total_runs INTEGER DEFAULT 0,
|
||||||
|
successful_runs INTEGER DEFAULT 0,
|
||||||
|
failed_runs INTEGER DEFAULT 0,
|
||||||
|
avg_duration_ms INTEGER,
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Worker run history
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_runs (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
worker_id INTEGER NOT NULL REFERENCES workers(id) ON DELETE CASCADE,
|
||||||
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
completed_at TIMESTAMPTZ,
|
||||||
|
status VARCHAR(50) DEFAULT 'running', -- 'running', 'success', 'error', 'cancelled'
|
||||||
|
duration_ms INTEGER,
|
||||||
|
|
||||||
|
-- What was processed
|
||||||
|
jobs_created INTEGER DEFAULT 0,
|
||||||
|
jobs_completed INTEGER DEFAULT 0,
|
||||||
|
jobs_failed INTEGER DEFAULT 0,
|
||||||
|
dispensaries_crawled INTEGER DEFAULT 0,
|
||||||
|
products_found INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
error_message TEXT,
|
||||||
|
metadata JSONB DEFAULT '{}',
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for efficient lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_workers_enabled ON workers(enabled) WHERE enabled = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_workers_next_run ON workers(next_run_at) WHERE enabled = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_workers_status ON workers(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_runs_worker_id ON worker_runs(worker_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_runs_started_at ON worker_runs(started_at DESC);
|
||||||
|
|
||||||
|
-- Add worker_id to dispensary_crawl_jobs if not exists
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensary_crawl_jobs' AND column_name = 'assigned_worker_id'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE dispensary_crawl_jobs ADD COLUMN assigned_worker_id INTEGER REFERENCES workers(id);
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
-- Migrate existing job_schedules workers to new workers table
|
||||||
|
INSERT INTO workers (name, role, description, enabled, interval_minutes, jitter_minutes, job_type, job_config, last_run_at, last_status, last_error, last_duration_ms, next_run_at)
|
||||||
|
SELECT
|
||||||
|
worker_name,
|
||||||
|
worker_role,
|
||||||
|
description,
|
||||||
|
enabled,
|
||||||
|
base_interval_minutes,
|
||||||
|
jitter_minutes,
|
||||||
|
job_name,
|
||||||
|
job_config,
|
||||||
|
last_run_at,
|
||||||
|
last_status,
|
||||||
|
last_error_message,
|
||||||
|
last_duration_ms,
|
||||||
|
next_run_at
|
||||||
|
FROM job_schedules
|
||||||
|
WHERE worker_name IS NOT NULL
|
||||||
|
ON CONFLICT (name) DO UPDATE SET
|
||||||
|
updated_at = NOW();
|
||||||
|
|
||||||
|
-- Available worker roles (reference)
|
||||||
|
COMMENT ON TABLE workers IS 'Named workers with specific roles and assignments. Roles include:
|
||||||
|
- product_sync: Crawls products from dispensary menus
|
||||||
|
- store_discovery: Discovers new dispensary locations
|
||||||
|
- entry_point_finder: Detects menu providers and resolves platform IDs
|
||||||
|
- analytics_refresh: Refreshes materialized views and analytics
|
||||||
|
- price_monitor: Monitors price changes and triggers alerts
|
||||||
|
- inventory_sync: Syncs inventory levels
|
||||||
|
- image_processor: Downloads and processes product images
|
||||||
|
- data_validator: Validates data integrity';
|
||||||
49
backend/migrations/052_seo_settings.sql
Normal file
49
backend/migrations/052_seo_settings.sql
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
-- Migration 052: SEO Settings Table
|
||||||
|
-- Key/value store for SEO Orchestrator configuration
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS seo_settings (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
key TEXT UNIQUE NOT NULL,
|
||||||
|
value JSONB NOT NULL,
|
||||||
|
created_at TIMESTAMP DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMP DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Create index on key for fast lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_seo_settings_key ON seo_settings(key);
|
||||||
|
|
||||||
|
-- Seed with default settings
|
||||||
|
INSERT INTO seo_settings (key, value) VALUES
|
||||||
|
-- Section 1: Global Content Generation Settings
|
||||||
|
('primary_prompt_template', '"You are a cannabis industry content expert. Generate SEO-optimized content for {{page_type}} pages about {{subject}}. Focus on: {{focus_areas}}. Maintain a {{tone}} tone and keep content {{length}}."'),
|
||||||
|
('regeneration_prompt_template', '"Regenerate the following SEO content with fresh perspectives. Original topic: {{subject}}. Improve upon: {{improvement_areas}}. Maintain compliance with cannabis industry standards."'),
|
||||||
|
('default_content_length', '"medium"'),
|
||||||
|
('tone_voice', '"informational"'),
|
||||||
|
|
||||||
|
-- Section 2: Automatic Refresh Rules
|
||||||
|
('auto_refresh_interval', '"weekly"'),
|
||||||
|
('trigger_pct_product_change', 'true'),
|
||||||
|
('trigger_pct_brand_change', 'true'),
|
||||||
|
('trigger_new_stores', 'true'),
|
||||||
|
('trigger_market_shift', 'false'),
|
||||||
|
('webhook_url', '""'),
|
||||||
|
('notify_on_trigger', 'false'),
|
||||||
|
|
||||||
|
-- Section 3: Page-Level Defaults
|
||||||
|
('default_title_template', '"{{state_name}} Dispensaries | Find Cannabis Near You | CannaiQ"'),
|
||||||
|
('default_meta_description_template', '"Discover the best dispensaries in {{state_name}}. Browse {{dispensary_count}}+ licensed retailers, compare prices, and find cannabis products near you."'),
|
||||||
|
('default_slug_template', '"dispensaries-{{state_code_lower}}"'),
|
||||||
|
('default_og_image_template', '"/images/seo/og-{{state_code_lower}}.jpg"'),
|
||||||
|
('enable_ai_images', 'false'),
|
||||||
|
|
||||||
|
-- Section 4: Crawl / Dataset Configuration
|
||||||
|
('primary_data_provider', '"cannaiq"'),
|
||||||
|
('fallback_data_provider', '"dutchie"'),
|
||||||
|
('min_data_freshness_hours', '24'),
|
||||||
|
('stale_data_behavior', '"allow_with_warning"')
|
||||||
|
ON CONFLICT (key) DO NOTHING;
|
||||||
|
|
||||||
|
-- Record migration
|
||||||
|
INSERT INTO schema_migrations (version, name, applied_at)
|
||||||
|
VALUES ('052', 'seo_settings', NOW())
|
||||||
|
ON CONFLICT (version) DO NOTHING;
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
-- Migration 057: Add crawl_enabled and dutchie_verified fields to dispensaries
|
||||||
|
--
|
||||||
|
-- Purpose:
|
||||||
|
-- 1. Add crawl_enabled to control which dispensaries get crawled
|
||||||
|
-- 2. Add dutchie_verified to track Dutchie source-of-truth verification
|
||||||
|
-- 3. Default existing records to crawl_enabled = TRUE to preserve behavior
|
||||||
|
--
|
||||||
|
-- After this migration, run the harmonization script to:
|
||||||
|
-- - Match dispensaries to Dutchie discoveries
|
||||||
|
-- - Update platform_dispensary_id from Dutchie
|
||||||
|
-- - Set dutchie_verified = TRUE for matches
|
||||||
|
-- - Set crawl_enabled = FALSE for unverified records
|
||||||
|
|
||||||
|
-- Add crawl_enabled column (defaults to true to not break existing crawls)
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS crawl_enabled BOOLEAN DEFAULT TRUE;
|
||||||
|
|
||||||
|
-- Add dutchie_verified column to track if record is verified against Dutchie
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS dutchie_verified BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Add dutchie_verified_at timestamp
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS dutchie_verified_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
|
||||||
|
-- Add dutchie_discovery_id to link back to the discovery record
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS dutchie_discovery_id BIGINT REFERENCES dutchie_discovery_locations(id);
|
||||||
|
|
||||||
|
-- Create index for crawl queries (only crawl enabled dispensaries)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_crawl_enabled
|
||||||
|
ON dispensaries(crawl_enabled, state)
|
||||||
|
WHERE crawl_enabled = TRUE;
|
||||||
|
|
||||||
|
-- Create index for dutchie verification status
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_dutchie_verified
|
||||||
|
ON dispensaries(dutchie_verified, state);
|
||||||
|
|
||||||
|
COMMENT ON COLUMN dispensaries.crawl_enabled IS 'Whether this dispensary should be included in crawl jobs. Set to FALSE for unverified or problematic records.';
|
||||||
|
COMMENT ON COLUMN dispensaries.dutchie_verified IS 'Whether this dispensary has been verified against Dutchie source of truth (matched by slug or manually linked).';
|
||||||
|
COMMENT ON COLUMN dispensaries.dutchie_verified_at IS 'Timestamp when Dutchie verification was completed.';
|
||||||
|
COMMENT ON COLUMN dispensaries.dutchie_discovery_id IS 'Link to the dutchie_discovery_locations record this was matched/verified against.';
|
||||||
56
backend/migrations/065_slug_verification_tracking.sql
Normal file
56
backend/migrations/065_slug_verification_tracking.sql
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
-- Migration 065: Slug verification and data source tracking
|
||||||
|
-- Adds columns to track when slug/menu data was verified and from what source
|
||||||
|
|
||||||
|
-- Add slug verification columns to dispensaries
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS slug_source VARCHAR(50),
|
||||||
|
ADD COLUMN IF NOT EXISTS slug_verified_at TIMESTAMPTZ,
|
||||||
|
ADD COLUMN IF NOT EXISTS slug_status VARCHAR(20) DEFAULT 'unverified',
|
||||||
|
ADD COLUMN IF NOT EXISTS menu_url_source VARCHAR(50),
|
||||||
|
ADD COLUMN IF NOT EXISTS menu_url_verified_at TIMESTAMPTZ,
|
||||||
|
ADD COLUMN IF NOT EXISTS platform_id_source VARCHAR(50),
|
||||||
|
ADD COLUMN IF NOT EXISTS platform_id_verified_at TIMESTAMPTZ,
|
||||||
|
ADD COLUMN IF NOT EXISTS country VARCHAR(2) DEFAULT 'US';
|
||||||
|
|
||||||
|
-- Add index for finding unverified stores
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_slug_status
|
||||||
|
ON dispensaries(slug_status)
|
||||||
|
WHERE slug_status != 'verified';
|
||||||
|
|
||||||
|
-- Add index for country
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_country
|
||||||
|
ON dispensaries(country);
|
||||||
|
|
||||||
|
-- Comment on columns
|
||||||
|
COMMENT ON COLUMN dispensaries.slug_source IS 'Source of slug data: dutchie_api, manual, azdhs, discovery, etc.';
|
||||||
|
COMMENT ON COLUMN dispensaries.slug_verified_at IS 'When the slug was last verified against the source';
|
||||||
|
COMMENT ON COLUMN dispensaries.slug_status IS 'Status: unverified, verified, invalid, changed';
|
||||||
|
COMMENT ON COLUMN dispensaries.menu_url_source IS 'Source of menu_url: dutchie_api, website_scrape, manual, etc.';
|
||||||
|
COMMENT ON COLUMN dispensaries.menu_url_verified_at IS 'When the menu_url was last verified';
|
||||||
|
COMMENT ON COLUMN dispensaries.platform_id_source IS 'Source of platform_dispensary_id: dutchie_api, graphql_resolution, etc.';
|
||||||
|
COMMENT ON COLUMN dispensaries.platform_id_verified_at IS 'When the platform_dispensary_id was last verified';
|
||||||
|
COMMENT ON COLUMN dispensaries.country IS 'ISO 2-letter country code: US, CA, etc.';
|
||||||
|
|
||||||
|
-- Update Green Pharms Mesa with verified Dutchie data
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET
|
||||||
|
slug = 'green-pharms-mesa',
|
||||||
|
menu_url = 'https://dutchie.com/embedded-menu/green-pharms-mesa',
|
||||||
|
menu_type = 'dutchie',
|
||||||
|
platform_dispensary_id = '68dc47a2af90f2e653f8df30',
|
||||||
|
slug_source = 'dutchie_api',
|
||||||
|
slug_verified_at = NOW(),
|
||||||
|
slug_status = 'verified',
|
||||||
|
menu_url_source = 'dutchie_api',
|
||||||
|
menu_url_verified_at = NOW(),
|
||||||
|
platform_id_source = 'dutchie_api',
|
||||||
|
platform_id_verified_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = 232;
|
||||||
|
|
||||||
|
-- Mark all other AZ dispensaries as needing verification
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET slug_status = 'unverified'
|
||||||
|
WHERE state = 'AZ'
|
||||||
|
AND id != 232
|
||||||
|
AND (slug_status IS NULL OR slug_status = 'unverified');
|
||||||
140
backend/migrations/066_dutchie_field_alignment.sql
Normal file
140
backend/migrations/066_dutchie_field_alignment.sql
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
-- Migration 066: Align dispensaries and discovery_locations tables with Dutchie field names
|
||||||
|
-- Uses snake_case convention (Postgres standard) mapped from Dutchie's camelCase
|
||||||
|
--
|
||||||
|
-- Changes:
|
||||||
|
-- 1. dispensaries: rename address→address1, zip→zipcode, remove company_name
|
||||||
|
-- 2. dispensaries: add missing Dutchie fields
|
||||||
|
-- 3. dutchie_discovery_locations: add missing Dutchie fields
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- DISPENSARIES TABLE
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- Rename address to address1 (matches Dutchie's address1)
|
||||||
|
ALTER TABLE dispensaries RENAME COLUMN address TO address1;
|
||||||
|
|
||||||
|
-- Rename zip to zipcode (matches Dutchie's zip, but we use zipcode for clarity)
|
||||||
|
ALTER TABLE dispensaries RENAME COLUMN zip TO zipcode;
|
||||||
|
|
||||||
|
-- Drop company_name (redundant with name)
|
||||||
|
ALTER TABLE dispensaries DROP COLUMN IF EXISTS company_name;
|
||||||
|
|
||||||
|
-- Add address2
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS address2 VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add country
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS country VARCHAR(100) DEFAULT 'United States';
|
||||||
|
|
||||||
|
-- Add timezone
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Add email
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS email VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add description
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS description TEXT;
|
||||||
|
|
||||||
|
-- Add logo_image (Dutchie: logoImage)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS logo_image TEXT;
|
||||||
|
|
||||||
|
-- Add banner_image (Dutchie: bannerImage)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS banner_image TEXT;
|
||||||
|
|
||||||
|
-- Add offer_pickup (Dutchie: offerPickup)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_pickup BOOLEAN DEFAULT TRUE;
|
||||||
|
|
||||||
|
-- Add offer_delivery (Dutchie: offerDelivery)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_delivery BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Add offer_curbside_pickup (Dutchie: offerCurbsidePickup)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_curbside_pickup BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Add is_medical (Dutchie: isMedical)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_medical BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Add is_recreational (Dutchie: isRecreational)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_recreational BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Add chain_slug (Dutchie: chain)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS chain_slug VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add enterprise_id (Dutchie: retailer.enterpriseId)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS enterprise_id VARCHAR(100);
|
||||||
|
|
||||||
|
-- Add status (Dutchie: status - open/closed)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS status VARCHAR(50);
|
||||||
|
|
||||||
|
-- Add c_name (Dutchie: cName - the URL slug used in embedded menus)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS c_name VARCHAR(255);
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- DUTCHIE_DISCOVERY_LOCATIONS TABLE
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- Add phone
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS phone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Add website (Dutchie: embedBackUrl)
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS website TEXT;
|
||||||
|
|
||||||
|
-- Add email
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS email VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add description
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS description TEXT;
|
||||||
|
|
||||||
|
-- Add logo_image
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS logo_image TEXT;
|
||||||
|
|
||||||
|
-- Add banner_image
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS banner_image TEXT;
|
||||||
|
|
||||||
|
-- Add chain_slug
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS chain_slug VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add enterprise_id
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS enterprise_id VARCHAR(100);
|
||||||
|
|
||||||
|
-- Add c_name
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS c_name VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add country
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS country VARCHAR(100) DEFAULT 'United States';
|
||||||
|
|
||||||
|
-- Add store status
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS store_status VARCHAR(50);
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- INDEXES
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- Index for chain lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_chain_slug ON dispensaries(chain_slug) WHERE chain_slug IS NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_chain_slug ON dutchie_discovery_locations(chain_slug) WHERE chain_slug IS NOT NULL;
|
||||||
|
|
||||||
|
-- Index for enterprise lookups (for multi-location chains)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_enterprise_id ON dispensaries(enterprise_id) WHERE enterprise_id IS NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_enterprise_id ON dutchie_discovery_locations(enterprise_id) WHERE enterprise_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- Index for c_name lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_c_name ON dispensaries(c_name) WHERE c_name IS NOT NULL;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- COMMENTS
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
COMMENT ON COLUMN dispensaries.address1 IS 'Street address line 1 (Dutchie: address1)';
|
||||||
|
COMMENT ON COLUMN dispensaries.address2 IS 'Street address line 2 (Dutchie: address2)';
|
||||||
|
COMMENT ON COLUMN dispensaries.zipcode IS 'ZIP/postal code (Dutchie: zip)';
|
||||||
|
COMMENT ON COLUMN dispensaries.c_name IS 'Dutchie URL slug for embedded menus (Dutchie: cName)';
|
||||||
|
COMMENT ON COLUMN dispensaries.chain_slug IS 'Chain identifier slug (Dutchie: chain)';
|
||||||
|
COMMENT ON COLUMN dispensaries.enterprise_id IS 'Parent enterprise UUID (Dutchie: retailer.enterpriseId)';
|
||||||
|
COMMENT ON COLUMN dispensaries.logo_image IS 'Logo image URL (Dutchie: logoImage)';
|
||||||
|
COMMENT ON COLUMN dispensaries.banner_image IS 'Banner image URL (Dutchie: bannerImage)';
|
||||||
|
COMMENT ON COLUMN dispensaries.offer_pickup IS 'Offers in-store pickup (Dutchie: offerPickup)';
|
||||||
|
COMMENT ON COLUMN dispensaries.offer_delivery IS 'Offers delivery (Dutchie: offerDelivery)';
|
||||||
|
COMMENT ON COLUMN dispensaries.offer_curbside_pickup IS 'Offers curbside pickup (Dutchie: offerCurbsidePickup)';
|
||||||
|
COMMENT ON COLUMN dispensaries.is_medical IS 'Licensed for medical sales (Dutchie: isMedical)';
|
||||||
|
COMMENT ON COLUMN dispensaries.is_recreational IS 'Licensed for recreational sales (Dutchie: isRecreational)';
|
||||||
|
|
||||||
|
SELECT 'Migration 066 completed: Dutchie field alignment' as status;
|
||||||
24
backend/migrations/067_promotion_log.sql
Normal file
24
backend/migrations/067_promotion_log.sql
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
-- Promotion log table for tracking discovery → dispensary promotions
|
||||||
|
-- Tracks validation and promotion actions for audit/review
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS dutchie_promotion_log (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
discovery_id INTEGER REFERENCES dutchie_discovery_locations(id) ON DELETE SET NULL,
|
||||||
|
dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE SET NULL,
|
||||||
|
action VARCHAR(50) NOT NULL, -- 'validated', 'rejected', 'promoted_create', 'promoted_update', 'skipped'
|
||||||
|
state_code VARCHAR(10),
|
||||||
|
store_name VARCHAR(255),
|
||||||
|
validation_errors TEXT[], -- Array of error messages if rejected
|
||||||
|
field_changes JSONB, -- Before/after snapshot of changed fields
|
||||||
|
triggered_by VARCHAR(100) DEFAULT 'auto', -- 'auto', 'manual', 'api'
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for efficient querying
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_promotion_log_discovery_id ON dutchie_promotion_log(discovery_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_promotion_log_dispensary_id ON dutchie_promotion_log(dispensary_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_promotion_log_action ON dutchie_promotion_log(action);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_promotion_log_state_code ON dutchie_promotion_log(state_code);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_promotion_log_created_at ON dutchie_promotion_log(created_at DESC);
|
||||||
|
|
||||||
|
COMMENT ON TABLE dutchie_promotion_log IS 'Audit log for discovery location validation and promotion to dispensaries';
|
||||||
95
backend/migrations/068_crawler_status_alerts.sql
Normal file
95
backend/migrations/068_crawler_status_alerts.sql
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
-- Migration 068: Crawler Status Alerts
|
||||||
|
-- Creates status_alerts table for dashboard notifications and status change logging
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STATUS ALERTS TABLE
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS crawler_status_alerts (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- References
|
||||||
|
dispensary_id INTEGER REFERENCES dispensaries(id),
|
||||||
|
profile_id INTEGER REFERENCES dispensary_crawler_profiles(id),
|
||||||
|
|
||||||
|
-- Alert info
|
||||||
|
alert_type VARCHAR(50) NOT NULL, -- 'status_change', 'crawl_error', 'validation_failed', 'promoted', 'demoted'
|
||||||
|
severity VARCHAR(20) DEFAULT 'info', -- 'info', 'warning', 'error', 'critical'
|
||||||
|
|
||||||
|
-- Status transition
|
||||||
|
previous_status VARCHAR(50),
|
||||||
|
new_status VARCHAR(50),
|
||||||
|
|
||||||
|
-- Context
|
||||||
|
message TEXT,
|
||||||
|
error_details JSONB,
|
||||||
|
metadata JSONB, -- Additional context (product counts, error codes, etc.)
|
||||||
|
|
||||||
|
-- Tracking
|
||||||
|
acknowledged BOOLEAN DEFAULT FALSE,
|
||||||
|
acknowledged_at TIMESTAMP WITH TIME ZONE,
|
||||||
|
acknowledged_by VARCHAR(100),
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for common queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_dispensary ON crawler_status_alerts(dispensary_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_type ON crawler_status_alerts(alert_type);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_severity ON crawler_status_alerts(severity);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_unack ON crawler_status_alerts(acknowledged) WHERE acknowledged = FALSE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_created ON crawler_status_alerts(created_at DESC);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STATUS DEFINITIONS (for reference/validation)
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
COMMENT ON TABLE crawler_status_alerts IS 'Crawler status change notifications for dashboard alerting';
|
||||||
|
COMMENT ON COLUMN crawler_status_alerts.alert_type IS 'Type: status_change, crawl_error, validation_failed, promoted, demoted';
|
||||||
|
COMMENT ON COLUMN crawler_status_alerts.severity IS 'Severity: info, warning, error, critical';
|
||||||
|
COMMENT ON COLUMN crawler_status_alerts.previous_status IS 'Previous crawler status before change';
|
||||||
|
COMMENT ON COLUMN crawler_status_alerts.new_status IS 'New crawler status after change';
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STATUS TRACKING ON PROFILES
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Add columns for status tracking if not exists
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
-- Consecutive success count for auto-promotion
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'consecutive_successes') THEN
|
||||||
|
ALTER TABLE dispensary_crawler_profiles ADD COLUMN consecutive_successes INTEGER DEFAULT 0;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Consecutive failure count for auto-demotion
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'consecutive_failures') THEN
|
||||||
|
ALTER TABLE dispensary_crawler_profiles ADD COLUMN consecutive_failures INTEGER DEFAULT 0;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Last status change timestamp
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'status_changed_at') THEN
|
||||||
|
ALTER TABLE dispensary_crawler_profiles ADD COLUMN status_changed_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Status change reason
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'status_reason') THEN
|
||||||
|
ALTER TABLE dispensary_crawler_profiles ADD COLUMN status_reason TEXT;
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VALID STATUS VALUES
|
||||||
|
-- ============================================================
|
||||||
|
-- Status values for dispensary_crawler_profiles.status:
|
||||||
|
-- 'sandbox' - Newly created, being validated
|
||||||
|
-- 'production' - Healthy, actively crawled
|
||||||
|
-- 'needs_manual' - Requires human intervention
|
||||||
|
-- 'failing' - Multiple consecutive failures
|
||||||
|
-- 'disabled' - Manually disabled
|
||||||
|
-- 'legacy' - No profile, uses default method (virtual status)
|
||||||
163
backend/migrations/069_six_stage_status.sql
Normal file
163
backend/migrations/069_six_stage_status.sql
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
-- Migration 069: Seven-Stage Status System
|
||||||
|
--
|
||||||
|
-- Implements explicit 7-stage pipeline for store lifecycle:
|
||||||
|
-- 1. discovered - Found via Dutchie API, raw data
|
||||||
|
-- 2. validated - Passed field checks, ready for promotion
|
||||||
|
-- 3. promoted - In dispensaries table, has crawler profile
|
||||||
|
-- 4. sandbox - First crawl attempted, testing
|
||||||
|
-- 5. hydrating - Products are being loaded/updated
|
||||||
|
-- 6. production - Healthy, scheduled crawls via Horizon
|
||||||
|
-- 7. failing - Crawl errors, needs attention
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STAGE ENUM TYPE
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
-- Create enum if not exists
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'store_stage') THEN
|
||||||
|
CREATE TYPE store_stage AS ENUM (
|
||||||
|
'discovered',
|
||||||
|
'validated',
|
||||||
|
'promoted',
|
||||||
|
'sandbox',
|
||||||
|
'hydrating',
|
||||||
|
'production',
|
||||||
|
'failing'
|
||||||
|
);
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- UPDATE DISCOVERY LOCATIONS TABLE
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Add stage column to discovery locations (replaces status)
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dutchie_discovery_locations' AND column_name = 'stage') THEN
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN stage VARCHAR(20) DEFAULT 'discovered';
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
-- Migrate existing status values to stage
|
||||||
|
UPDATE dutchie_discovery_locations
|
||||||
|
SET stage = CASE
|
||||||
|
WHEN status = 'discovered' THEN 'discovered'
|
||||||
|
WHEN status = 'verified' THEN 'validated'
|
||||||
|
WHEN status = 'rejected' THEN 'failing'
|
||||||
|
WHEN status = 'merged' THEN 'validated'
|
||||||
|
ELSE 'discovered'
|
||||||
|
END
|
||||||
|
WHERE stage IS NULL OR stage = '';
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- UPDATE CRAWLER PROFILES TABLE
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Ensure status column exists and update to new values
|
||||||
|
UPDATE dispensary_crawler_profiles
|
||||||
|
SET status = CASE
|
||||||
|
WHEN status = 'sandbox' THEN 'sandbox'
|
||||||
|
WHEN status = 'production' THEN 'production'
|
||||||
|
WHEN status = 'needs_manual' THEN 'failing'
|
||||||
|
WHEN status = 'failing' THEN 'failing'
|
||||||
|
WHEN status = 'disabled' THEN 'failing'
|
||||||
|
WHEN status IS NULL THEN 'promoted'
|
||||||
|
ELSE 'promoted'
|
||||||
|
END;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- ADD STAGE TRACKING TO DISPENSARIES
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
-- Add stage column to dispensaries for quick filtering
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensaries' AND column_name = 'stage') THEN
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN stage VARCHAR(20) DEFAULT 'promoted';
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Add stage_changed_at for tracking
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensaries' AND column_name = 'stage_changed_at') THEN
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN stage_changed_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Add first_crawl_at to track sandbox → production transition
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensaries' AND column_name = 'first_crawl_at') THEN
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN first_crawl_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Add last_successful_crawl_at
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensaries' AND column_name = 'last_successful_crawl_at') THEN
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN last_successful_crawl_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
-- Set initial stage for existing dispensaries based on their crawler profile status
|
||||||
|
UPDATE dispensaries d
|
||||||
|
SET stage = COALESCE(
|
||||||
|
(SELECT dcp.status FROM dispensary_crawler_profiles dcp
|
||||||
|
WHERE dcp.dispensary_id = d.id AND dcp.enabled = true
|
||||||
|
ORDER BY dcp.updated_at DESC LIMIT 1),
|
||||||
|
'promoted'
|
||||||
|
)
|
||||||
|
WHERE d.stage IS NULL OR d.stage = '';
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- INDEXES FOR STAGE-BASED QUERIES
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_stage ON dispensaries(stage);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_stage_state ON dispensaries(stage, state);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_stage ON dutchie_discovery_locations(stage);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_status ON dispensary_crawler_profiles(status);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STAGE TRANSITION LOG
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS stage_transitions (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- What changed
|
||||||
|
entity_type VARCHAR(20) NOT NULL, -- 'discovery_location' or 'dispensary'
|
||||||
|
entity_id INTEGER NOT NULL,
|
||||||
|
|
||||||
|
-- Stage change
|
||||||
|
from_stage VARCHAR(20),
|
||||||
|
to_stage VARCHAR(20) NOT NULL,
|
||||||
|
|
||||||
|
-- Context
|
||||||
|
trigger_type VARCHAR(50) NOT NULL, -- 'api', 'scheduler', 'manual', 'auto'
|
||||||
|
trigger_endpoint VARCHAR(200),
|
||||||
|
|
||||||
|
-- Outcome
|
||||||
|
success BOOLEAN DEFAULT TRUE,
|
||||||
|
error_message TEXT,
|
||||||
|
metadata JSONB,
|
||||||
|
|
||||||
|
-- Timing
|
||||||
|
duration_ms INTEGER,
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_stage_transitions_entity ON stage_transitions(entity_type, entity_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_stage_transitions_to_stage ON stage_transitions(to_stage);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_stage_transitions_created ON stage_transitions(created_at DESC);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- COMMENTS
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
COMMENT ON TABLE stage_transitions IS 'Audit log for all stage transitions in the pipeline';
|
||||||
|
COMMENT ON COLUMN dispensaries.stage IS 'Current pipeline stage: discovered, validated, promoted, sandbox, production, failing';
|
||||||
|
COMMENT ON COLUMN dispensaries.stage_changed_at IS 'When the stage was last changed';
|
||||||
|
COMMENT ON COLUMN dispensaries.first_crawl_at IS 'When the first crawl was attempted (sandbox stage)';
|
||||||
|
COMMENT ON COLUMN dispensaries.last_successful_crawl_at IS 'When the last successful crawl completed';
|
||||||
239
backend/migrations/070_product_variants.sql
Normal file
239
backend/migrations/070_product_variants.sql
Normal file
@@ -0,0 +1,239 @@
|
|||||||
|
-- ============================================================================
|
||||||
|
-- Migration 070: Product Variants Tables
|
||||||
|
-- ============================================================================
|
||||||
|
--
|
||||||
|
-- Purpose: Store variant-level pricing and inventory as first-class entities
|
||||||
|
-- to enable time-series analytics, price comparisons, and sale tracking.
|
||||||
|
--
|
||||||
|
-- Enables queries like:
|
||||||
|
-- - Price history for a specific variant (1g Blue Dream over time)
|
||||||
|
-- - Sale frequency analysis (how often is this on special?)
|
||||||
|
-- - Cross-store price comparison (who has cheapest 1g flower?)
|
||||||
|
-- - Current specials across all stores
|
||||||
|
--
|
||||||
|
-- RULES:
|
||||||
|
-- - STRICTLY ADDITIVE (no DROP, DELETE, TRUNCATE)
|
||||||
|
-- - All new tables use IF NOT EXISTS
|
||||||
|
-- - All indexes use IF NOT EXISTS
|
||||||
|
--
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- SECTION 1: PRODUCT_VARIANTS TABLE (Current State)
|
||||||
|
-- ============================================================================
|
||||||
|
-- One row per product+option combination. Tracks current pricing/inventory.
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS product_variants (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
store_product_id INTEGER NOT NULL REFERENCES store_products(id) ON DELETE CASCADE,
|
||||||
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
|
||||||
|
-- Variant identity (from Dutchie POSMetaData.children)
|
||||||
|
option VARCHAR(100) NOT NULL, -- "1g", "3.5g", "1/8oz", "100mg"
|
||||||
|
canonical_sku VARCHAR(100), -- Dutchie canonicalSKU
|
||||||
|
canonical_id VARCHAR(100), -- Dutchie canonicalID
|
||||||
|
canonical_name VARCHAR(500), -- Dutchie canonicalName
|
||||||
|
|
||||||
|
-- Current pricing (in dollars, not cents)
|
||||||
|
price_rec NUMERIC(10,2),
|
||||||
|
price_med NUMERIC(10,2),
|
||||||
|
price_rec_special NUMERIC(10,2),
|
||||||
|
price_med_special NUMERIC(10,2),
|
||||||
|
|
||||||
|
-- Current inventory
|
||||||
|
quantity INTEGER,
|
||||||
|
quantity_available INTEGER,
|
||||||
|
in_stock BOOLEAN DEFAULT TRUE,
|
||||||
|
|
||||||
|
-- Special/sale status
|
||||||
|
is_on_special BOOLEAN DEFAULT FALSE,
|
||||||
|
|
||||||
|
-- Weight/size parsing (for analytics)
|
||||||
|
weight_value NUMERIC(10,2), -- 1, 3.5, 28, etc.
|
||||||
|
weight_unit VARCHAR(20), -- g, oz, mg, ml, etc.
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
first_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_price_change_at TIMESTAMPTZ,
|
||||||
|
last_stock_change_at TIMESTAMPTZ,
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
UNIQUE(store_product_id, option)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for common queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_store_product ON product_variants(store_product_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_dispensary ON product_variants(dispensary_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_option ON product_variants(option);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_in_stock ON product_variants(dispensary_id, in_stock) WHERE in_stock = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_on_special ON product_variants(dispensary_id, is_on_special) WHERE is_on_special = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_canonical_sku ON product_variants(canonical_sku) WHERE canonical_sku IS NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_price_rec ON product_variants(price_rec) WHERE price_rec IS NOT NULL;
|
||||||
|
|
||||||
|
COMMENT ON TABLE product_variants IS 'Current state of each product variant (weight/size option). One row per product+option.';
|
||||||
|
COMMENT ON COLUMN product_variants.option IS 'Weight/size option string from Dutchie (e.g., "1g", "3.5g", "1/8oz")';
|
||||||
|
COMMENT ON COLUMN product_variants.canonical_sku IS 'Dutchie POS SKU for cross-store matching';
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- SECTION 2: PRODUCT_VARIANT_SNAPSHOTS TABLE (Historical Data)
|
||||||
|
-- ============================================================================
|
||||||
|
-- Time-series data for variant pricing. One row per variant per crawl.
|
||||||
|
-- CRITICAL: NEVER DELETE from this table.
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS product_variant_snapshots (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
product_variant_id INTEGER NOT NULL REFERENCES product_variants(id) ON DELETE CASCADE,
|
||||||
|
store_product_id INTEGER REFERENCES store_products(id) ON DELETE SET NULL,
|
||||||
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
|
||||||
|
|
||||||
|
-- Variant identity (denormalized for query performance)
|
||||||
|
option VARCHAR(100) NOT NULL,
|
||||||
|
|
||||||
|
-- Pricing at time of capture
|
||||||
|
price_rec NUMERIC(10,2),
|
||||||
|
price_med NUMERIC(10,2),
|
||||||
|
price_rec_special NUMERIC(10,2),
|
||||||
|
price_med_special NUMERIC(10,2),
|
||||||
|
|
||||||
|
-- Inventory at time of capture
|
||||||
|
quantity INTEGER,
|
||||||
|
in_stock BOOLEAN DEFAULT TRUE,
|
||||||
|
|
||||||
|
-- Special status at time of capture
|
||||||
|
is_on_special BOOLEAN DEFAULT FALSE,
|
||||||
|
|
||||||
|
-- Feed presence (FALSE = variant missing from crawl)
|
||||||
|
is_present_in_feed BOOLEAN DEFAULT TRUE,
|
||||||
|
|
||||||
|
-- Capture timestamp
|
||||||
|
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for time-series queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_variant ON product_variant_snapshots(product_variant_id, captured_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_dispensary ON product_variant_snapshots(dispensary_id, captured_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_crawl ON product_variant_snapshots(crawl_run_id) WHERE crawl_run_id IS NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_captured ON product_variant_snapshots(captured_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_special ON product_variant_snapshots(is_on_special, captured_at DESC) WHERE is_on_special = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_option ON product_variant_snapshots(option, captured_at DESC);
|
||||||
|
|
||||||
|
COMMENT ON TABLE product_variant_snapshots IS 'Historical variant pricing/inventory. One row per variant per crawl. NEVER DELETE.';
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- SECTION 3: USEFUL VIEWS
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- View: Current specials across all stores
|
||||||
|
CREATE OR REPLACE VIEW v_current_specials AS
|
||||||
|
SELECT
|
||||||
|
pv.id as variant_id,
|
||||||
|
sp.id as product_id,
|
||||||
|
sp.name_raw as product_name,
|
||||||
|
sp.brand_name_raw as brand_name,
|
||||||
|
sp.category_raw as category,
|
||||||
|
d.id as dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
pv.option,
|
||||||
|
pv.price_rec,
|
||||||
|
pv.price_rec_special,
|
||||||
|
ROUND(((pv.price_rec - pv.price_rec_special) / NULLIF(pv.price_rec, 0)) * 100, 1) as discount_percent,
|
||||||
|
pv.quantity,
|
||||||
|
pv.in_stock,
|
||||||
|
pv.last_seen_at
|
||||||
|
FROM product_variants pv
|
||||||
|
JOIN store_products sp ON sp.id = pv.store_product_id
|
||||||
|
JOIN dispensaries d ON d.id = pv.dispensary_id
|
||||||
|
WHERE pv.is_on_special = TRUE
|
||||||
|
AND pv.in_stock = TRUE
|
||||||
|
AND pv.price_rec_special IS NOT NULL
|
||||||
|
AND pv.price_rec_special < pv.price_rec;
|
||||||
|
|
||||||
|
COMMENT ON VIEW v_current_specials IS 'All products currently on special across all stores';
|
||||||
|
|
||||||
|
|
||||||
|
-- View: Price comparison for a product across stores
|
||||||
|
CREATE OR REPLACE VIEW v_price_comparison AS
|
||||||
|
SELECT
|
||||||
|
sp.name_raw as product_name,
|
||||||
|
sp.brand_name_raw as brand_name,
|
||||||
|
sp.category_raw as category,
|
||||||
|
pv.option,
|
||||||
|
d.id as dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city,
|
||||||
|
pv.price_rec,
|
||||||
|
pv.price_rec_special,
|
||||||
|
pv.is_on_special,
|
||||||
|
pv.in_stock,
|
||||||
|
pv.quantity,
|
||||||
|
RANK() OVER (PARTITION BY sp.name_raw, pv.option ORDER BY COALESCE(pv.price_rec_special, pv.price_rec) ASC) as price_rank
|
||||||
|
FROM product_variants pv
|
||||||
|
JOIN store_products sp ON sp.id = pv.store_product_id
|
||||||
|
JOIN dispensaries d ON d.id = pv.dispensary_id
|
||||||
|
WHERE pv.in_stock = TRUE
|
||||||
|
AND (pv.price_rec IS NOT NULL OR pv.price_rec_special IS NOT NULL);
|
||||||
|
|
||||||
|
COMMENT ON VIEW v_price_comparison IS 'Compare prices for same product across stores, ranked by price';
|
||||||
|
|
||||||
|
|
||||||
|
-- View: Latest snapshot per variant
|
||||||
|
CREATE OR REPLACE VIEW v_latest_variant_snapshots AS
|
||||||
|
SELECT DISTINCT ON (product_variant_id)
|
||||||
|
pvs.*
|
||||||
|
FROM product_variant_snapshots pvs
|
||||||
|
ORDER BY product_variant_id, captured_at DESC;
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- SECTION 4: HELPER FUNCTION FOR SALE FREQUENCY
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- Function to calculate sale frequency for a variant
|
||||||
|
CREATE OR REPLACE FUNCTION get_variant_sale_stats(p_variant_id INTEGER, p_days INTEGER DEFAULT 30)
|
||||||
|
RETURNS TABLE (
|
||||||
|
total_snapshots BIGINT,
|
||||||
|
times_on_special BIGINT,
|
||||||
|
special_frequency_pct NUMERIC,
|
||||||
|
avg_discount_pct NUMERIC,
|
||||||
|
min_price NUMERIC,
|
||||||
|
max_price NUMERIC,
|
||||||
|
avg_price NUMERIC
|
||||||
|
) AS $$
|
||||||
|
BEGIN
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT
|
||||||
|
COUNT(*)::BIGINT as total_snapshots,
|
||||||
|
COUNT(*) FILTER (WHERE is_on_special)::BIGINT as times_on_special,
|
||||||
|
ROUND((COUNT(*) FILTER (WHERE is_on_special)::NUMERIC / NULLIF(COUNT(*), 0)) * 100, 1) as special_frequency_pct,
|
||||||
|
ROUND(AVG(
|
||||||
|
CASE WHEN is_on_special AND price_rec_special IS NOT NULL AND price_rec IS NOT NULL
|
||||||
|
THEN ((price_rec - price_rec_special) / NULLIF(price_rec, 0)) * 100
|
||||||
|
END
|
||||||
|
), 1) as avg_discount_pct,
|
||||||
|
MIN(COALESCE(price_rec_special, price_rec)) as min_price,
|
||||||
|
MAX(price_rec) as max_price,
|
||||||
|
ROUND(AVG(COALESCE(price_rec_special, price_rec)), 2) as avg_price
|
||||||
|
FROM product_variant_snapshots
|
||||||
|
WHERE product_variant_id = p_variant_id
|
||||||
|
AND captured_at >= NOW() - (p_days || ' days')::INTERVAL;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION get_variant_sale_stats IS 'Get sale frequency and price stats for a variant over N days';
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- DONE
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
SELECT 'Migration 070 completed. Product variants tables ready for time-series analytics.' AS status;
|
||||||
53
backend/migrations/071_harmonize_store_products.sql
Normal file
53
backend/migrations/071_harmonize_store_products.sql
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
-- Migration 071: Harmonize store_products with dutchie_products
|
||||||
|
-- Adds missing columns to store_products to consolidate on a single canonical table
|
||||||
|
|
||||||
|
-- Product details
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS description TEXT;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weight VARCHAR(50);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weights JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS measurements JSONB;
|
||||||
|
|
||||||
|
-- Cannabinoid/terpene data
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS effects JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS terpenes JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cannabinoids_v2 JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS thc_content NUMERIC(10,4);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cbd_content NUMERIC(10,4);
|
||||||
|
|
||||||
|
-- Images
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS images JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS primary_image_url TEXT;
|
||||||
|
|
||||||
|
-- Inventory
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS total_quantity_available INTEGER DEFAULT 0;
|
||||||
|
|
||||||
|
-- Status/flags
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS status VARCHAR(50);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS featured BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS coming_soon BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_lost BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_lost_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_restored_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
|
||||||
|
-- Threshold flags (Dutchie-specific)
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_threshold BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options_below_threshold BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS certificate_of_analysis_enabled BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Platform metadata
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS external_product_id VARCHAR(100);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS c_name VARCHAR(500);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS past_c_names TEXT[];
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS latest_raw_payload JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS created_at_platform TIMESTAMP WITH TIME ZONE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS updated_at_platform TIMESTAMP WITH TIME ZONE;
|
||||||
|
|
||||||
|
-- Indexes for common queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_external_id ON store_products(external_product_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_visibility_lost ON store_products(visibility_lost) WHERE visibility_lost = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_status ON store_products(status);
|
||||||
|
|
||||||
|
-- Add comment
|
||||||
|
COMMENT ON TABLE store_products IS 'Canonical product table - consolidated from dutchie_products';
|
||||||
74
backend/migrations/072_product_views.sql
Normal file
74
backend/migrations/072_product_views.sql
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
-- Migration 072: Create compatibility views for store_products and store_product_snapshots
|
||||||
|
-- These views provide backward-compatible column names for API routes
|
||||||
|
|
||||||
|
-- v_products view - aliases store_products columns to match legacy dutchie_products naming
|
||||||
|
CREATE OR REPLACE VIEW v_products AS
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
dispensary_id,
|
||||||
|
provider_product_id as external_product_id,
|
||||||
|
provider_product_id as dutchie_id,
|
||||||
|
name_raw as name,
|
||||||
|
brand_name_raw as brand_name,
|
||||||
|
category_raw as type,
|
||||||
|
subcategory_raw as subcategory,
|
||||||
|
strain_type,
|
||||||
|
thc_percent as thc,
|
||||||
|
cbd_percent as cbd,
|
||||||
|
stock_status,
|
||||||
|
is_in_stock,
|
||||||
|
stock_quantity,
|
||||||
|
image_url,
|
||||||
|
primary_image_url,
|
||||||
|
images,
|
||||||
|
effects,
|
||||||
|
description,
|
||||||
|
is_on_special,
|
||||||
|
featured,
|
||||||
|
medical_only,
|
||||||
|
rec_only,
|
||||||
|
external_product_id as external_id,
|
||||||
|
provider,
|
||||||
|
created_at,
|
||||||
|
updated_at
|
||||||
|
FROM store_products;
|
||||||
|
|
||||||
|
-- v_product_snapshots view - aliases store_product_snapshots columns to match legacy naming
|
||||||
|
CREATE OR REPLACE VIEW v_product_snapshots AS
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
store_product_id,
|
||||||
|
dispensary_id,
|
||||||
|
provider,
|
||||||
|
provider_product_id,
|
||||||
|
crawl_run_id,
|
||||||
|
captured_at as crawled_at,
|
||||||
|
name_raw,
|
||||||
|
brand_name_raw,
|
||||||
|
category_raw,
|
||||||
|
subcategory_raw,
|
||||||
|
-- Convert price_rec (dollars) to rec_min_price_cents (cents)
|
||||||
|
CASE WHEN price_rec IS NOT NULL THEN (price_rec * 100)::integer END as rec_min_price_cents,
|
||||||
|
CASE WHEN price_rec IS NOT NULL THEN (price_rec * 100)::integer END as rec_max_price_cents,
|
||||||
|
CASE WHEN price_rec_special IS NOT NULL THEN (price_rec_special * 100)::integer END as rec_min_special_price_cents,
|
||||||
|
CASE WHEN price_med IS NOT NULL THEN (price_med * 100)::integer END as med_min_price_cents,
|
||||||
|
CASE WHEN price_med IS NOT NULL THEN (price_med * 100)::integer END as med_max_price_cents,
|
||||||
|
CASE WHEN price_med_special IS NOT NULL THEN (price_med_special * 100)::integer END as med_min_special_price_cents,
|
||||||
|
is_on_special as special,
|
||||||
|
discount_percent,
|
||||||
|
is_in_stock,
|
||||||
|
stock_quantity,
|
||||||
|
stock_status,
|
||||||
|
stock_quantity as total_quantity_available,
|
||||||
|
thc_percent,
|
||||||
|
cbd_percent,
|
||||||
|
image_url,
|
||||||
|
raw_data as options,
|
||||||
|
created_at
|
||||||
|
FROM store_product_snapshots;
|
||||||
|
|
||||||
|
-- Add indexes for the views' underlying tables
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_dispensary ON store_products(dispensary_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_stock ON store_products(stock_status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_snapshots_product ON store_product_snapshots(store_product_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_snapshots_captured ON store_product_snapshots(captured_at DESC);
|
||||||
12
backend/migrations/073_proxy_timezone.sql
Normal file
12
backend/migrations/073_proxy_timezone.sql
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
-- Add timezone column to proxies table for geo-consistent fingerprinting
|
||||||
|
-- This allows matching Accept-Language and other headers to proxy location
|
||||||
|
|
||||||
|
ALTER TABLE proxies
|
||||||
|
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Add timezone to failed_proxies as well
|
||||||
|
ALTER TABLE failed_proxies
|
||||||
|
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Comment explaining usage
|
||||||
|
COMMENT ON COLUMN proxies.timezone IS 'IANA timezone (e.g., America/Phoenix) for geo-consistent fingerprinting';
|
||||||
27
backend/migrations/074_worker_commands.sql
Normal file
27
backend/migrations/074_worker_commands.sql
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
-- Migration: Worker Commands Table
|
||||||
|
-- Purpose: Store commands for workers (decommission, etc.)
|
||||||
|
-- Workers poll this table after each task to check for commands
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_commands (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
worker_id TEXT NOT NULL,
|
||||||
|
command TEXT NOT NULL, -- 'decommission', 'pause', 'resume'
|
||||||
|
reason TEXT,
|
||||||
|
issued_by TEXT,
|
||||||
|
issued_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
acknowledged_at TIMESTAMPTZ,
|
||||||
|
executed_at TIMESTAMPTZ,
|
||||||
|
status TEXT DEFAULT 'pending' -- 'pending', 'acknowledged', 'executed', 'cancelled'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for worker lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_commands_worker_id ON worker_commands(worker_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_commands_pending ON worker_commands(worker_id, status) WHERE status = 'pending';
|
||||||
|
|
||||||
|
-- Add decommission_requested column to worker_registry for quick checks
|
||||||
|
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_reason TEXT;
|
||||||
|
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
-- Comment
|
||||||
|
COMMENT ON TABLE worker_commands IS 'Commands issued to workers (decommission after task, pause, etc.)';
|
||||||
322
backend/migrations/074_worker_task_queue.sql
Normal file
322
backend/migrations/074_worker_task_queue.sql
Normal file
@@ -0,0 +1,322 @@
|
|||||||
|
-- Migration 074: Worker Task Queue System
|
||||||
|
-- Implements role-based task queue with per-store locking and capacity tracking
|
||||||
|
|
||||||
|
-- Task queue table
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_tasks (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Task identification
|
||||||
|
role VARCHAR(50) NOT NULL, -- store_discovery, entry_point_discovery, product_discovery, product_resync, analytics_refresh
|
||||||
|
dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
platform VARCHAR(20), -- dutchie, jane, treez, etc.
|
||||||
|
|
||||||
|
-- Task state
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'pending',
|
||||||
|
priority INTEGER DEFAULT 0, -- Higher = more urgent
|
||||||
|
|
||||||
|
-- Scheduling
|
||||||
|
scheduled_for TIMESTAMPTZ, -- For batch scheduling (e.g., every 4 hours)
|
||||||
|
|
||||||
|
-- Ownership
|
||||||
|
worker_id VARCHAR(100), -- Pod name or worker ID
|
||||||
|
claimed_at TIMESTAMPTZ,
|
||||||
|
started_at TIMESTAMPTZ,
|
||||||
|
completed_at TIMESTAMPTZ,
|
||||||
|
last_heartbeat_at TIMESTAMPTZ,
|
||||||
|
|
||||||
|
-- Results
|
||||||
|
result JSONB, -- Task output data
|
||||||
|
error_message TEXT,
|
||||||
|
retry_count INTEGER DEFAULT 0,
|
||||||
|
max_retries INTEGER DEFAULT 3,
|
||||||
|
|
||||||
|
-- Metadata
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
-- Constraints
|
||||||
|
CONSTRAINT valid_status CHECK (status IN ('pending', 'claimed', 'running', 'completed', 'failed', 'stale'))
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for efficient task claiming
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_pending
|
||||||
|
ON worker_tasks(role, priority DESC, created_at ASC)
|
||||||
|
WHERE status = 'pending';
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_claimed
|
||||||
|
ON worker_tasks(worker_id, claimed_at)
|
||||||
|
WHERE status = 'claimed';
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_running
|
||||||
|
ON worker_tasks(worker_id, last_heartbeat_at)
|
||||||
|
WHERE status = 'running';
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_dispensary
|
||||||
|
ON worker_tasks(dispensary_id)
|
||||||
|
WHERE dispensary_id IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_scheduled
|
||||||
|
ON worker_tasks(scheduled_for)
|
||||||
|
WHERE status = 'pending' AND scheduled_for IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_history
|
||||||
|
ON worker_tasks(role, completed_at DESC)
|
||||||
|
WHERE status IN ('completed', 'failed');
|
||||||
|
|
||||||
|
-- Partial unique index to prevent duplicate active tasks per store
|
||||||
|
-- Only one task can be claimed/running for a given dispensary at a time
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_worker_tasks_unique_active_store
|
||||||
|
ON worker_tasks(dispensary_id)
|
||||||
|
WHERE status IN ('claimed', 'running') AND dispensary_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- Worker registration table (tracks active workers)
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_registry (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
worker_id VARCHAR(100) UNIQUE NOT NULL,
|
||||||
|
role VARCHAR(50) NOT NULL,
|
||||||
|
pod_name VARCHAR(100),
|
||||||
|
hostname VARCHAR(100),
|
||||||
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_heartbeat_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
tasks_completed INTEGER DEFAULT 0,
|
||||||
|
tasks_failed INTEGER DEFAULT 0,
|
||||||
|
status VARCHAR(20) DEFAULT 'active',
|
||||||
|
|
||||||
|
CONSTRAINT valid_worker_status CHECK (status IN ('active', 'idle', 'offline'))
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_role
|
||||||
|
ON worker_registry(role, status);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_heartbeat
|
||||||
|
ON worker_registry(last_heartbeat_at)
|
||||||
|
WHERE status = 'active';
|
||||||
|
|
||||||
|
-- Task completion tracking (summarized history)
|
||||||
|
CREATE TABLE IF NOT EXISTS task_completion_log (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
role VARCHAR(50) NOT NULL,
|
||||||
|
date DATE NOT NULL DEFAULT CURRENT_DATE,
|
||||||
|
hour INTEGER NOT NULL DEFAULT EXTRACT(HOUR FROM NOW()),
|
||||||
|
|
||||||
|
tasks_created INTEGER DEFAULT 0,
|
||||||
|
tasks_completed INTEGER DEFAULT 0,
|
||||||
|
tasks_failed INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
avg_duration_sec NUMERIC(10,2),
|
||||||
|
min_duration_sec NUMERIC(10,2),
|
||||||
|
max_duration_sec NUMERIC(10,2),
|
||||||
|
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
UNIQUE(role, date, hour)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Capacity planning view
|
||||||
|
CREATE OR REPLACE VIEW v_worker_capacity AS
|
||||||
|
SELECT
|
||||||
|
role,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'pending') as pending_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'pending' AND (scheduled_for IS NULL OR scheduled_for <= NOW())) as ready_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'claimed') as claimed_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'running') as running_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') as completed_last_hour,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'failed' AND completed_at > NOW() - INTERVAL '1 hour') as failed_last_hour,
|
||||||
|
COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) as active_workers,
|
||||||
|
AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
|
||||||
|
FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') as avg_duration_sec,
|
||||||
|
-- Capacity planning metrics
|
||||||
|
CASE
|
||||||
|
WHEN COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') > 0
|
||||||
|
THEN 3600.0 / NULLIF(AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
|
||||||
|
FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour'), 0)
|
||||||
|
ELSE NULL
|
||||||
|
END as tasks_per_worker_hour,
|
||||||
|
-- Estimated time to drain queue
|
||||||
|
CASE
|
||||||
|
WHEN COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) > 0
|
||||||
|
AND COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') > 0
|
||||||
|
THEN COUNT(*) FILTER (WHERE status = 'pending') / NULLIF(
|
||||||
|
COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) *
|
||||||
|
(3600.0 / NULLIF(AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
|
||||||
|
FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour'), 0)),
|
||||||
|
0
|
||||||
|
)
|
||||||
|
ELSE NULL
|
||||||
|
END as estimated_hours_to_drain
|
||||||
|
FROM worker_tasks
|
||||||
|
GROUP BY role;
|
||||||
|
|
||||||
|
-- Task history view (for UI)
|
||||||
|
CREATE OR REPLACE VIEW v_task_history AS
|
||||||
|
SELECT
|
||||||
|
t.id,
|
||||||
|
t.role,
|
||||||
|
t.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
t.platform,
|
||||||
|
t.status,
|
||||||
|
t.priority,
|
||||||
|
t.worker_id,
|
||||||
|
t.scheduled_for,
|
||||||
|
t.claimed_at,
|
||||||
|
t.started_at,
|
||||||
|
t.completed_at,
|
||||||
|
t.error_message,
|
||||||
|
t.retry_count,
|
||||||
|
t.created_at,
|
||||||
|
EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
|
||||||
|
FROM worker_tasks t
|
||||||
|
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
|
||||||
|
ORDER BY t.created_at DESC;
|
||||||
|
|
||||||
|
-- Function to claim a task atomically
|
||||||
|
CREATE OR REPLACE FUNCTION claim_task(
|
||||||
|
p_role VARCHAR(50),
|
||||||
|
p_worker_id VARCHAR(100)
|
||||||
|
) RETURNS worker_tasks AS $$
|
||||||
|
DECLARE
|
||||||
|
claimed_task worker_tasks;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'claimed',
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
claimed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_tasks
|
||||||
|
WHERE role = p_role
|
||||||
|
AND status = 'pending'
|
||||||
|
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
||||||
|
-- Exclude stores that already have an active task
|
||||||
|
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
||||||
|
SELECT dispensary_id FROM worker_tasks
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND dispensary_id IS NOT NULL
|
||||||
|
))
|
||||||
|
ORDER BY priority DESC, created_at ASC
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_task;
|
||||||
|
|
||||||
|
RETURN claimed_task;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to mark stale tasks (workers that died)
|
||||||
|
CREATE OR REPLACE FUNCTION recover_stale_tasks(
|
||||||
|
stale_threshold_minutes INTEGER DEFAULT 10
|
||||||
|
) RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
recovered_count INTEGER;
|
||||||
|
BEGIN
|
||||||
|
WITH stale AS (
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'pending',
|
||||||
|
worker_id = NULL,
|
||||||
|
claimed_at = NULL,
|
||||||
|
started_at = NULL,
|
||||||
|
retry_count = retry_count + 1,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
|
||||||
|
AND retry_count < max_retries
|
||||||
|
RETURNING id
|
||||||
|
)
|
||||||
|
SELECT COUNT(*) INTO recovered_count FROM stale;
|
||||||
|
|
||||||
|
-- Mark tasks that exceeded retries as failed
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'failed',
|
||||||
|
error_message = 'Exceeded max retries after worker failures',
|
||||||
|
completed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
|
||||||
|
AND retry_count >= max_retries;
|
||||||
|
|
||||||
|
RETURN recovered_count;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to generate daily resync tasks
|
||||||
|
CREATE OR REPLACE FUNCTION generate_resync_tasks(
|
||||||
|
p_batches_per_day INTEGER DEFAULT 6, -- Every 4 hours
|
||||||
|
p_date DATE DEFAULT CURRENT_DATE
|
||||||
|
) RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
store_count INTEGER;
|
||||||
|
stores_per_batch INTEGER;
|
||||||
|
batch_num INTEGER;
|
||||||
|
scheduled_time TIMESTAMPTZ;
|
||||||
|
created_count INTEGER := 0;
|
||||||
|
BEGIN
|
||||||
|
-- Count active stores that need resync
|
||||||
|
SELECT COUNT(*) INTO store_count
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE crawl_enabled = true
|
||||||
|
AND menu_type = 'dutchie'
|
||||||
|
AND platform_dispensary_id IS NOT NULL;
|
||||||
|
|
||||||
|
IF store_count = 0 THEN
|
||||||
|
RETURN 0;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
stores_per_batch := CEIL(store_count::NUMERIC / p_batches_per_day);
|
||||||
|
|
||||||
|
FOR batch_num IN 0..(p_batches_per_day - 1) LOOP
|
||||||
|
scheduled_time := p_date + (batch_num * 4 || ' hours')::INTERVAL;
|
||||||
|
|
||||||
|
INSERT INTO worker_tasks (role, dispensary_id, platform, scheduled_for, priority)
|
||||||
|
SELECT
|
||||||
|
'product_resync',
|
||||||
|
d.id,
|
||||||
|
'dutchie',
|
||||||
|
scheduled_time,
|
||||||
|
0
|
||||||
|
FROM (
|
||||||
|
SELECT id, ROW_NUMBER() OVER (ORDER BY id) as rn
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE crawl_enabled = true
|
||||||
|
AND menu_type = 'dutchie'
|
||||||
|
AND platform_dispensary_id IS NOT NULL
|
||||||
|
) d
|
||||||
|
WHERE d.rn > (batch_num * stores_per_batch)
|
||||||
|
AND d.rn <= ((batch_num + 1) * stores_per_batch)
|
||||||
|
ON CONFLICT DO NOTHING;
|
||||||
|
|
||||||
|
GET DIAGNOSTICS created_count = created_count + ROW_COUNT;
|
||||||
|
END LOOP;
|
||||||
|
|
||||||
|
RETURN created_count;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Trigger to update timestamp
|
||||||
|
CREATE OR REPLACE FUNCTION update_worker_tasks_timestamp()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.updated_at = NOW();
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
DROP TRIGGER IF EXISTS worker_tasks_updated_at ON worker_tasks;
|
||||||
|
CREATE TRIGGER worker_tasks_updated_at
|
||||||
|
BEFORE UPDATE ON worker_tasks
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION update_worker_tasks_timestamp();
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON TABLE worker_tasks IS 'Central task queue for all worker roles';
|
||||||
|
COMMENT ON TABLE worker_registry IS 'Registry of active workers and their stats';
|
||||||
|
COMMENT ON TABLE task_completion_log IS 'Hourly aggregated task completion metrics';
|
||||||
|
COMMENT ON VIEW v_worker_capacity IS 'Real-time capacity planning metrics per role';
|
||||||
|
COMMENT ON VIEW v_task_history IS 'Task history with dispensary details for UI';
|
||||||
|
COMMENT ON FUNCTION claim_task IS 'Atomically claim a task for a worker, respecting per-store locking';
|
||||||
|
COMMENT ON FUNCTION recover_stale_tasks IS 'Release tasks from dead workers back to pending';
|
||||||
|
COMMENT ON FUNCTION generate_resync_tasks IS 'Generate daily product resync tasks in batches';
|
||||||
13
backend/migrations/075_consecutive_misses.sql
Normal file
13
backend/migrations/075_consecutive_misses.sql
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
-- Migration 075: Add consecutive_misses column to store_products
|
||||||
|
-- Used to track how many consecutive crawls a product has been missing from the feed
|
||||||
|
-- After 3 consecutive misses, product is marked as OOS
|
||||||
|
|
||||||
|
ALTER TABLE store_products
|
||||||
|
ADD COLUMN IF NOT EXISTS consecutive_misses INTEGER NOT NULL DEFAULT 0;
|
||||||
|
|
||||||
|
-- Index for finding products that need OOS check
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_consecutive_misses
|
||||||
|
ON store_products (dispensary_id, consecutive_misses)
|
||||||
|
WHERE consecutive_misses > 0;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN store_products.consecutive_misses IS 'Number of consecutive crawls where product was not in feed. Reset to 0 when seen. At 3, mark OOS.';
|
||||||
71
backend/migrations/076_visitor_analytics.sql
Normal file
71
backend/migrations/076_visitor_analytics.sql
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
-- Visitor location analytics for Findagram
|
||||||
|
-- Tracks visitor locations to understand popular areas
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS visitor_locations (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Location data (from IP lookup)
|
||||||
|
ip_hash VARCHAR(64), -- Hashed IP for privacy (SHA256)
|
||||||
|
city VARCHAR(100),
|
||||||
|
state VARCHAR(100),
|
||||||
|
state_code VARCHAR(10),
|
||||||
|
country VARCHAR(100),
|
||||||
|
country_code VARCHAR(10),
|
||||||
|
latitude DECIMAL(10, 7),
|
||||||
|
longitude DECIMAL(10, 7),
|
||||||
|
|
||||||
|
-- Visit metadata
|
||||||
|
domain VARCHAR(50) NOT NULL, -- 'findagram.co', 'findadispo.com', etc.
|
||||||
|
page_path VARCHAR(255), -- '/products', '/dispensaries/123', etc.
|
||||||
|
referrer VARCHAR(500),
|
||||||
|
user_agent VARCHAR(500),
|
||||||
|
|
||||||
|
-- Session tracking
|
||||||
|
session_id VARCHAR(64), -- For grouping page views in a session
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for analytics queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_domain ON visitor_locations(domain);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_city_state ON visitor_locations(city, state_code);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_created_at ON visitor_locations(created_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_session ON visitor_locations(session_id);
|
||||||
|
|
||||||
|
-- Aggregated daily stats (materialized for performance)
|
||||||
|
CREATE TABLE IF NOT EXISTS visitor_location_stats (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
date DATE NOT NULL,
|
||||||
|
domain VARCHAR(50) NOT NULL,
|
||||||
|
city VARCHAR(100),
|
||||||
|
state VARCHAR(100),
|
||||||
|
state_code VARCHAR(10),
|
||||||
|
country_code VARCHAR(10),
|
||||||
|
|
||||||
|
-- Metrics
|
||||||
|
visit_count INTEGER DEFAULT 0,
|
||||||
|
unique_sessions INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
UNIQUE(date, domain, city, state_code, country_code)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_stats_date ON visitor_location_stats(date);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_stats_domain ON visitor_location_stats(domain);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_stats_state ON visitor_location_stats(state_code);
|
||||||
|
|
||||||
|
-- View for easy querying of top locations
|
||||||
|
CREATE OR REPLACE VIEW v_top_visitor_locations AS
|
||||||
|
SELECT
|
||||||
|
domain,
|
||||||
|
city,
|
||||||
|
state,
|
||||||
|
state_code,
|
||||||
|
country_code,
|
||||||
|
COUNT(*) as total_visits,
|
||||||
|
COUNT(DISTINCT session_id) as unique_sessions,
|
||||||
|
MAX(created_at) as last_visit
|
||||||
|
FROM visitor_locations
|
||||||
|
WHERE created_at > NOW() - INTERVAL '30 days'
|
||||||
|
GROUP BY domain, city, state, state_code, country_code
|
||||||
|
ORDER BY total_visits DESC;
|
||||||
141
backend/migrations/076_worker_registry.sql
Normal file
141
backend/migrations/076_worker_registry.sql
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
-- Migration 076: Worker Registry for Dynamic Workers
|
||||||
|
-- Workers register on startup, receive a friendly name, and report heartbeats
|
||||||
|
|
||||||
|
-- Name pool for workers (expandable, no hardcoding)
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_name_pool (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(50) UNIQUE NOT NULL,
|
||||||
|
in_use BOOLEAN DEFAULT FALSE,
|
||||||
|
assigned_to VARCHAR(100), -- worker_id
|
||||||
|
assigned_at TIMESTAMPTZ,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Seed with initial names (can add more via API)
|
||||||
|
INSERT INTO worker_name_pool (name) VALUES
|
||||||
|
('Alice'), ('Bella'), ('Clara'), ('Diana'), ('Elena'),
|
||||||
|
('Fiona'), ('Grace'), ('Hazel'), ('Iris'), ('Julia'),
|
||||||
|
('Katie'), ('Luna'), ('Mia'), ('Nora'), ('Olive'),
|
||||||
|
('Pearl'), ('Quinn'), ('Rosa'), ('Sara'), ('Tara'),
|
||||||
|
('Uma'), ('Vera'), ('Wendy'), ('Xena'), ('Yuki'), ('Zara'),
|
||||||
|
('Amber'), ('Blake'), ('Coral'), ('Dawn'), ('Echo'),
|
||||||
|
('Fleur'), ('Gem'), ('Haven'), ('Ivy'), ('Jade'),
|
||||||
|
('Kira'), ('Lotus'), ('Maple'), ('Nova'), ('Onyx'),
|
||||||
|
('Pixel'), ('Quest'), ('Raven'), ('Sage'), ('Terra'),
|
||||||
|
('Unity'), ('Violet'), ('Willow'), ('Xylo'), ('Yara'), ('Zen')
|
||||||
|
ON CONFLICT (name) DO NOTHING;
|
||||||
|
|
||||||
|
-- Worker registry - tracks active workers
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_registry (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
worker_id VARCHAR(100) UNIQUE NOT NULL, -- e.g., "pod-abc123" or uuid
|
||||||
|
friendly_name VARCHAR(50), -- assigned from pool
|
||||||
|
role VARCHAR(50) NOT NULL, -- task role
|
||||||
|
pod_name VARCHAR(100), -- k8s pod name
|
||||||
|
hostname VARCHAR(100), -- machine hostname
|
||||||
|
ip_address VARCHAR(50), -- worker IP
|
||||||
|
status VARCHAR(20) DEFAULT 'starting', -- starting, active, idle, offline, terminated
|
||||||
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_heartbeat_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_task_at TIMESTAMPTZ,
|
||||||
|
tasks_completed INTEGER DEFAULT 0,
|
||||||
|
tasks_failed INTEGER DEFAULT 0,
|
||||||
|
current_task_id INTEGER,
|
||||||
|
metadata JSONB DEFAULT '{}',
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for worker registry
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_status ON worker_registry(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_role ON worker_registry(role);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_heartbeat ON worker_registry(last_heartbeat_at);
|
||||||
|
|
||||||
|
-- Function to assign a name to a new worker
|
||||||
|
CREATE OR REPLACE FUNCTION assign_worker_name(p_worker_id VARCHAR(100))
|
||||||
|
RETURNS VARCHAR(50) AS $$
|
||||||
|
DECLARE
|
||||||
|
v_name VARCHAR(50);
|
||||||
|
BEGIN
|
||||||
|
-- Try to get an unused name
|
||||||
|
UPDATE worker_name_pool
|
||||||
|
SET in_use = TRUE, assigned_to = p_worker_id, assigned_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_name_pool
|
||||||
|
WHERE in_use = FALSE
|
||||||
|
ORDER BY RANDOM()
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING name INTO v_name;
|
||||||
|
|
||||||
|
-- If no names available, generate one
|
||||||
|
IF v_name IS NULL THEN
|
||||||
|
v_name := 'Worker-' || SUBSTRING(p_worker_id FROM 1 FOR 8);
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN v_name;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to release a worker's name back to the pool
|
||||||
|
CREATE OR REPLACE FUNCTION release_worker_name(p_worker_id VARCHAR(100))
|
||||||
|
RETURNS VOID AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_name_pool
|
||||||
|
SET in_use = FALSE, assigned_to = NULL, assigned_at = NULL
|
||||||
|
WHERE assigned_to = p_worker_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to mark stale workers as offline
|
||||||
|
CREATE OR REPLACE FUNCTION mark_stale_workers(stale_threshold_minutes INTEGER DEFAULT 5)
|
||||||
|
RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
v_count INTEGER;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET status = 'offline', updated_at = NOW()
|
||||||
|
WHERE status IN ('active', 'idle', 'starting')
|
||||||
|
AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
|
||||||
|
RETURNING COUNT(*) INTO v_count;
|
||||||
|
|
||||||
|
-- Release names from offline workers
|
||||||
|
PERFORM release_worker_name(worker_id)
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE status = 'offline'
|
||||||
|
AND last_heartbeat_at < NOW() - INTERVAL '30 minutes';
|
||||||
|
|
||||||
|
RETURN COALESCE(v_count, 0);
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- View for dashboard
|
||||||
|
CREATE OR REPLACE VIEW v_active_workers AS
|
||||||
|
SELECT
|
||||||
|
wr.id,
|
||||||
|
wr.worker_id,
|
||||||
|
wr.friendly_name,
|
||||||
|
wr.role,
|
||||||
|
wr.status,
|
||||||
|
wr.pod_name,
|
||||||
|
wr.hostname,
|
||||||
|
wr.started_at,
|
||||||
|
wr.last_heartbeat_at,
|
||||||
|
wr.last_task_at,
|
||||||
|
wr.tasks_completed,
|
||||||
|
wr.tasks_failed,
|
||||||
|
wr.current_task_id,
|
||||||
|
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
||||||
|
CASE
|
||||||
|
WHEN wr.status = 'offline' THEN 'offline'
|
||||||
|
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||||
|
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
||||||
|
ELSE 'ready'
|
||||||
|
END as health_status
|
||||||
|
FROM worker_registry wr
|
||||||
|
WHERE wr.status != 'terminated'
|
||||||
|
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
||||||
|
|
||||||
|
COMMENT ON TABLE worker_registry IS 'Tracks all workers that have registered with the system';
|
||||||
|
COMMENT ON TABLE worker_name_pool IS 'Pool of friendly names for workers - expandable via API';
|
||||||
35
backend/migrations/077_click_events_location.sql
Normal file
35
backend/migrations/077_click_events_location.sql
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
-- Migration: Add visitor location and dispensary name to click events
|
||||||
|
-- Captures where visitors are clicking from and which dispensary
|
||||||
|
|
||||||
|
-- Add visitor location columns
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_city VARCHAR(100);
|
||||||
|
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_state VARCHAR(10);
|
||||||
|
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_lat DECIMAL(10, 7);
|
||||||
|
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_lng DECIMAL(10, 7);
|
||||||
|
|
||||||
|
-- Add dispensary name for easier reporting
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS dispensary_name VARCHAR(255);
|
||||||
|
|
||||||
|
-- Create index for location-based analytics
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_product_click_events_visitor_state
|
||||||
|
ON product_click_events(visitor_state)
|
||||||
|
WHERE visitor_state IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_product_click_events_visitor_city
|
||||||
|
ON product_click_events(visitor_city)
|
||||||
|
WHERE visitor_city IS NOT NULL;
|
||||||
|
|
||||||
|
-- Add comments
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_city IS 'City where the visitor is located (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_state IS 'State where the visitor is located (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_lat IS 'Visitor latitude (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_lng IS 'Visitor longitude (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.dispensary_name IS 'Name of the dispensary (denormalized for easier reporting)';
|
||||||
8
backend/migrations/078_proxy_consecutive_403.sql
Normal file
8
backend/migrations/078_proxy_consecutive_403.sql
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
-- Migration 078: Add consecutive_403_count to proxies table
|
||||||
|
-- Per workflow-12102025.md: Track consecutive 403s per proxy
|
||||||
|
-- After 3 consecutive 403s with different fingerprints → disable proxy
|
||||||
|
|
||||||
|
ALTER TABLE proxies ADD COLUMN IF NOT EXISTS consecutive_403_count INTEGER DEFAULT 0;
|
||||||
|
|
||||||
|
-- Add comment explaining the column
|
||||||
|
COMMENT ON COLUMN proxies.consecutive_403_count IS 'Tracks consecutive 403 blocks. Reset to 0 on success. Proxy disabled at 3.';
|
||||||
49
backend/migrations/079_task_schedules.sql
Normal file
49
backend/migrations/079_task_schedules.sql
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
-- Migration 079: Task Schedules for Database-Driven Scheduler
|
||||||
|
-- Per TASK_WORKFLOW_2024-12-10.md: Replaces node-cron with DB-driven scheduling
|
||||||
|
--
|
||||||
|
-- 2024-12-10: Created for reliable, multi-replica-safe task scheduling
|
||||||
|
|
||||||
|
-- task_schedules: Stores schedule definitions and state
|
||||||
|
CREATE TABLE IF NOT EXISTS task_schedules (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(100) NOT NULL UNIQUE,
|
||||||
|
role VARCHAR(50) NOT NULL, -- TaskRole: product_refresh, store_discovery, etc.
|
||||||
|
description TEXT,
|
||||||
|
|
||||||
|
-- Schedule configuration
|
||||||
|
enabled BOOLEAN DEFAULT TRUE,
|
||||||
|
interval_hours INTEGER NOT NULL DEFAULT 4,
|
||||||
|
priority INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
-- Optional scope filters
|
||||||
|
state_code VARCHAR(2), -- NULL = all states
|
||||||
|
platform VARCHAR(50), -- NULL = all platforms
|
||||||
|
|
||||||
|
-- Execution state (updated by scheduler)
|
||||||
|
last_run_at TIMESTAMPTZ,
|
||||||
|
next_run_at TIMESTAMPTZ,
|
||||||
|
last_task_count INTEGER DEFAULT 0,
|
||||||
|
last_error TEXT,
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for scheduler queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_task_schedules_enabled ON task_schedules(enabled) WHERE enabled = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_task_schedules_next_run ON task_schedules(next_run_at) WHERE enabled = TRUE;
|
||||||
|
|
||||||
|
-- Insert default schedules
|
||||||
|
INSERT INTO task_schedules (name, role, interval_hours, priority, description, next_run_at)
|
||||||
|
VALUES
|
||||||
|
('product_refresh_all', 'product_refresh', 4, 0, 'Generate product refresh tasks for all crawl-enabled stores every 4 hours', NOW()),
|
||||||
|
('store_discovery_dutchie', 'store_discovery', 24, 5, 'Discover new Dutchie stores daily', NOW()),
|
||||||
|
('analytics_refresh', 'analytics_refresh', 6, 0, 'Refresh analytics materialized views every 6 hours', NOW())
|
||||||
|
ON CONFLICT (name) DO NOTHING;
|
||||||
|
|
||||||
|
-- Comment for documentation
|
||||||
|
COMMENT ON TABLE task_schedules IS 'Database-driven task scheduler configuration. Per TASK_WORKFLOW_2024-12-10.md:
|
||||||
|
- Schedules persist in DB (survive restarts)
|
||||||
|
- Uses SELECT FOR UPDATE SKIP LOCKED for multi-replica safety
|
||||||
|
- Scheduler polls every 60s and executes due schedules
|
||||||
|
- Creates tasks in worker_tasks for task-worker.ts to process';
|
||||||
58
backend/migrations/080_raw_crawl_payloads.sql
Normal file
58
backend/migrations/080_raw_crawl_payloads.sql
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
-- Migration 080: Raw Crawl Payloads Metadata Table
|
||||||
|
-- Per TASK_WORKFLOW_2024-12-10.md: Store full GraphQL payloads for historical analysis
|
||||||
|
--
|
||||||
|
-- Design Pattern: Metadata/Payload Separation
|
||||||
|
-- - Metadata (this table): Small, indexed, queryable
|
||||||
|
-- - Payload (filesystem): Gzipped JSON at storage_path
|
||||||
|
--
|
||||||
|
-- Benefits:
|
||||||
|
-- - Compare any two crawls to see what changed
|
||||||
|
-- - Replay/re-normalize historical data if logic changes
|
||||||
|
-- - Debug issues by seeing exactly what the API returned
|
||||||
|
-- - DB stays small, backups stay fast
|
||||||
|
--
|
||||||
|
-- Storage location: /storage/payloads/{year}/{month}/{day}/store_{id}_{timestamp}.json.gz
|
||||||
|
-- Compression: ~90% reduction (1.5MB -> 150KB per crawl)
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS raw_crawl_payloads (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Links to crawl tracking
|
||||||
|
crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
|
||||||
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
|
||||||
|
-- File location (gzipped JSON)
|
||||||
|
storage_path TEXT NOT NULL,
|
||||||
|
|
||||||
|
-- Metadata for quick queries without loading file
|
||||||
|
product_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
size_bytes INTEGER, -- Compressed size
|
||||||
|
size_bytes_raw INTEGER, -- Uncompressed size
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
-- Optional: checksum for integrity verification
|
||||||
|
checksum_sha256 VARCHAR(64)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for common queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_dispensary
|
||||||
|
ON raw_crawl_payloads(dispensary_id);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_dispensary_fetched
|
||||||
|
ON raw_crawl_payloads(dispensary_id, fetched_at DESC);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_fetched
|
||||||
|
ON raw_crawl_payloads(fetched_at DESC);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_crawl_run
|
||||||
|
ON raw_crawl_payloads(crawl_run_id)
|
||||||
|
WHERE crawl_run_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON TABLE raw_crawl_payloads IS 'Metadata for raw GraphQL payloads stored on filesystem. Per TASK_WORKFLOW_2024-12-10.md: Full payloads enable historical diffs and replay.';
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.storage_path IS 'Path to gzipped JSON file, e.g. /storage/payloads/2024/12/10/store_123_1702234567.json.gz';
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.size_bytes IS 'Compressed file size in bytes';
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.size_bytes_raw IS 'Uncompressed payload size in bytes';
|
||||||
37
backend/migrations/081_payload_fetch_columns.sql
Normal file
37
backend/migrations/081_payload_fetch_columns.sql
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
-- Migration 081: Payload Fetch Columns
|
||||||
|
-- Per TASK_WORKFLOW_2024-12-10.md: Separates API fetch from data processing
|
||||||
|
--
|
||||||
|
-- New architecture:
|
||||||
|
-- - payload_fetch: Hits Dutchie API, saves raw payload to disk
|
||||||
|
-- - product_refresh: Reads local payload, normalizes, upserts to DB
|
||||||
|
--
|
||||||
|
-- This migration adds:
|
||||||
|
-- 1. payload column to worker_tasks (for task chaining data)
|
||||||
|
-- 2. processed_at column to raw_crawl_payloads (track when payload was processed)
|
||||||
|
-- 3. last_fetch_at column to dispensaries (track when last payload was fetched)
|
||||||
|
|
||||||
|
-- Add payload column to worker_tasks for task chaining
|
||||||
|
-- Used by payload_fetch to pass payload_id to product_refresh
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS payload JSONB DEFAULT NULL;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN worker_tasks.payload IS 'Per TASK_WORKFLOW_2024-12-10.md: Task chaining data (e.g., payload_id from payload_fetch to product_refresh)';
|
||||||
|
|
||||||
|
-- Add processed_at to raw_crawl_payloads
|
||||||
|
-- Tracks when the payload was processed by product_refresh
|
||||||
|
ALTER TABLE raw_crawl_payloads
|
||||||
|
ADD COLUMN IF NOT EXISTS processed_at TIMESTAMPTZ DEFAULT NULL;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.processed_at IS 'When this payload was processed by product_refresh handler';
|
||||||
|
|
||||||
|
-- Index for finding unprocessed payloads
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_unprocessed
|
||||||
|
ON raw_crawl_payloads(dispensary_id, fetched_at DESC)
|
||||||
|
WHERE processed_at IS NULL;
|
||||||
|
|
||||||
|
-- Add last_fetch_at to dispensaries
|
||||||
|
-- Tracks when the last payload was fetched (separate from last_crawl_at which is when processing completed)
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS last_fetch_at TIMESTAMPTZ DEFAULT NULL;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN dispensaries.last_fetch_at IS 'Per TASK_WORKFLOW_2024-12-10.md: When last payload was fetched from API (separate from last_crawl_at which is when processing completed)';
|
||||||
27
backend/migrations/082_proxy_notification_trigger.sql
Normal file
27
backend/migrations/082_proxy_notification_trigger.sql
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
-- Migration: 082_proxy_notification_trigger
|
||||||
|
-- Date: 2024-12-11
|
||||||
|
-- Description: Add PostgreSQL NOTIFY trigger to alert workers when proxies are added
|
||||||
|
|
||||||
|
-- Create function to notify workers when active proxy is added/activated
|
||||||
|
CREATE OR REPLACE FUNCTION notify_proxy_added()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
-- Only notify if proxy is active
|
||||||
|
IF NEW.active = true THEN
|
||||||
|
PERFORM pg_notify('proxy_added', NEW.id::text);
|
||||||
|
END IF;
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Drop existing trigger if any
|
||||||
|
DROP TRIGGER IF EXISTS proxy_added_trigger ON proxies;
|
||||||
|
|
||||||
|
-- Create trigger on insert and update of active column
|
||||||
|
CREATE TRIGGER proxy_added_trigger
|
||||||
|
AFTER INSERT OR UPDATE OF active ON proxies
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION notify_proxy_added();
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION notify_proxy_added() IS
|
||||||
|
'Sends PostgreSQL NOTIFY to proxy_added channel when an active proxy is added or activated. Workers LISTEN on this channel to wake up immediately.';
|
||||||
88
backend/migrations/083_discovery_runs.sql
Normal file
88
backend/migrations/083_discovery_runs.sql
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
-- Migration 083: Discovery Run Tracking
|
||||||
|
-- Tracks progress of store discovery runs step-by-step
|
||||||
|
|
||||||
|
-- Main discovery runs table
|
||||||
|
CREATE TABLE IF NOT EXISTS discovery_runs (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
platform VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, completed, failed
|
||||||
|
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
finished_at TIMESTAMPTZ,
|
||||||
|
task_id INTEGER REFERENCES worker_task_queue(id),
|
||||||
|
|
||||||
|
-- Totals
|
||||||
|
states_total INTEGER DEFAULT 0,
|
||||||
|
states_completed INTEGER DEFAULT 0,
|
||||||
|
locations_discovered INTEGER DEFAULT 0,
|
||||||
|
locations_promoted INTEGER DEFAULT 0,
|
||||||
|
new_store_ids INTEGER[] DEFAULT '{}',
|
||||||
|
|
||||||
|
-- Error info
|
||||||
|
error_message TEXT,
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Per-state progress within a run
|
||||||
|
CREATE TABLE IF NOT EXISTS discovery_run_states (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
||||||
|
state_code VARCHAR(2) NOT NULL,
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending, running, completed, failed
|
||||||
|
started_at TIMESTAMPTZ,
|
||||||
|
finished_at TIMESTAMPTZ,
|
||||||
|
|
||||||
|
-- Results
|
||||||
|
cities_found INTEGER DEFAULT 0,
|
||||||
|
locations_found INTEGER DEFAULT 0,
|
||||||
|
locations_upserted INTEGER DEFAULT 0,
|
||||||
|
new_dispensary_ids INTEGER[] DEFAULT '{}',
|
||||||
|
|
||||||
|
-- Error info
|
||||||
|
error_message TEXT,
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
UNIQUE(run_id, state_code)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Step-by-step log for detailed progress tracking
|
||||||
|
CREATE TABLE IF NOT EXISTS discovery_run_steps (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
||||||
|
state_code VARCHAR(2),
|
||||||
|
step_name VARCHAR(100) NOT NULL,
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'started', -- started, completed, failed
|
||||||
|
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
finished_at TIMESTAMPTZ,
|
||||||
|
|
||||||
|
-- Details (JSON for flexibility)
|
||||||
|
details JSONB DEFAULT '{}',
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for querying
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_runs_status ON discovery_runs(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_runs_platform ON discovery_runs(platform);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_runs_started_at ON discovery_runs(started_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_run_states_run_id ON discovery_run_states(run_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_run_steps_run_id ON discovery_run_steps(run_id);
|
||||||
|
|
||||||
|
-- View for latest run status per platform
|
||||||
|
CREATE OR REPLACE VIEW v_latest_discovery_runs AS
|
||||||
|
SELECT DISTINCT ON (platform)
|
||||||
|
id,
|
||||||
|
platform,
|
||||||
|
status,
|
||||||
|
started_at,
|
||||||
|
finished_at,
|
||||||
|
states_total,
|
||||||
|
states_completed,
|
||||||
|
locations_discovered,
|
||||||
|
locations_promoted,
|
||||||
|
array_length(new_store_ids, 1) as new_stores_count,
|
||||||
|
error_message,
|
||||||
|
EXTRACT(EPOCH FROM (COALESCE(finished_at, NOW()) - started_at)) as duration_seconds
|
||||||
|
FROM discovery_runs
|
||||||
|
ORDER BY platform, started_at DESC;
|
||||||
253
backend/migrations/084_dual_transport_preflight.sql
Normal file
253
backend/migrations/084_dual_transport_preflight.sql
Normal file
@@ -0,0 +1,253 @@
|
|||||||
|
-- Migration 084: Dual Transport Preflight System
|
||||||
|
-- Workers run both curl and http (Puppeteer) preflights on startup
|
||||||
|
-- Tasks can require a specific transport method
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 1: Add preflight columns to worker_registry
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
-- Preflight status for curl/axios transport (proxy-based)
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_curl_status VARCHAR(20) DEFAULT 'pending';
|
||||||
|
|
||||||
|
-- Preflight status for http/Puppeteer transport (browser-based)
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_http_status VARCHAR(20) DEFAULT 'pending';
|
||||||
|
|
||||||
|
-- Timestamps for when each preflight completed
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_curl_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_http_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
-- Error messages for failed preflights
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_curl_error TEXT;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_http_error TEXT;
|
||||||
|
|
||||||
|
-- Response time for successful preflights (ms)
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_curl_ms INTEGER;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_http_ms INTEGER;
|
||||||
|
|
||||||
|
-- Constraints for preflight status values
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
DROP CONSTRAINT IF EXISTS valid_preflight_curl_status;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD CONSTRAINT valid_preflight_curl_status
|
||||||
|
CHECK (preflight_curl_status IN ('pending', 'passed', 'failed', 'skipped'));
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
DROP CONSTRAINT IF EXISTS valid_preflight_http_status;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD CONSTRAINT valid_preflight_http_status
|
||||||
|
CHECK (preflight_http_status IN ('pending', 'passed', 'failed', 'skipped'));
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 2: Add method column to worker_tasks
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
-- Transport method requirement for the task
|
||||||
|
-- NULL = no preference (any worker can claim)
|
||||||
|
-- 'curl' = requires curl/axios transport (proxy-based, fast)
|
||||||
|
-- 'http' = requires http/Puppeteer transport (browser-based, anti-detect)
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS method VARCHAR(10);
|
||||||
|
|
||||||
|
-- Constraint for valid method values
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
DROP CONSTRAINT IF EXISTS valid_task_method;
|
||||||
|
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD CONSTRAINT valid_task_method
|
||||||
|
CHECK (method IS NULL OR method IN ('curl', 'http'));
|
||||||
|
|
||||||
|
-- Index for method-based task claiming
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_method
|
||||||
|
ON worker_tasks(method)
|
||||||
|
WHERE status = 'pending';
|
||||||
|
|
||||||
|
-- Set default method for all existing pending tasks to 'http'
|
||||||
|
-- ALL current tasks require Puppeteer/browser-based transport
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET method = 'http'
|
||||||
|
WHERE method IS NULL;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 3: Update claim_task function for method compatibility
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION claim_task(
|
||||||
|
p_role VARCHAR(50),
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_curl_passed BOOLEAN DEFAULT TRUE,
|
||||||
|
p_http_passed BOOLEAN DEFAULT FALSE
|
||||||
|
) RETURNS worker_tasks AS $$
|
||||||
|
DECLARE
|
||||||
|
claimed_task worker_tasks;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'claimed',
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
claimed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_tasks
|
||||||
|
WHERE role = p_role
|
||||||
|
AND status = 'pending'
|
||||||
|
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
||||||
|
-- Method compatibility: worker must have passed the required preflight
|
||||||
|
AND (
|
||||||
|
method IS NULL -- No preference, any worker can claim
|
||||||
|
OR (method = 'curl' AND p_curl_passed = TRUE)
|
||||||
|
OR (method = 'http' AND p_http_passed = TRUE)
|
||||||
|
)
|
||||||
|
-- Exclude stores that already have an active task
|
||||||
|
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
||||||
|
SELECT dispensary_id FROM worker_tasks
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND dispensary_id IS NOT NULL
|
||||||
|
))
|
||||||
|
ORDER BY priority DESC, created_at ASC
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_task;
|
||||||
|
|
||||||
|
RETURN claimed_task;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 4: Update v_active_workers view
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
DROP VIEW IF EXISTS v_active_workers;
|
||||||
|
|
||||||
|
CREATE VIEW v_active_workers AS
|
||||||
|
SELECT
|
||||||
|
wr.id,
|
||||||
|
wr.worker_id,
|
||||||
|
wr.friendly_name,
|
||||||
|
wr.role,
|
||||||
|
wr.status,
|
||||||
|
wr.pod_name,
|
||||||
|
wr.hostname,
|
||||||
|
wr.started_at,
|
||||||
|
wr.last_heartbeat_at,
|
||||||
|
wr.last_task_at,
|
||||||
|
wr.tasks_completed,
|
||||||
|
wr.tasks_failed,
|
||||||
|
wr.current_task_id,
|
||||||
|
-- Preflight status
|
||||||
|
wr.preflight_curl_status,
|
||||||
|
wr.preflight_http_status,
|
||||||
|
wr.preflight_curl_at,
|
||||||
|
wr.preflight_http_at,
|
||||||
|
wr.preflight_curl_error,
|
||||||
|
wr.preflight_http_error,
|
||||||
|
wr.preflight_curl_ms,
|
||||||
|
wr.preflight_http_ms,
|
||||||
|
-- Computed fields
|
||||||
|
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
||||||
|
CASE
|
||||||
|
WHEN wr.status = 'offline' THEN 'offline'
|
||||||
|
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||||
|
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
||||||
|
ELSE 'ready'
|
||||||
|
END as health_status,
|
||||||
|
-- Capability flags (can this worker handle curl/http tasks?)
|
||||||
|
(wr.preflight_curl_status = 'passed') as can_curl,
|
||||||
|
(wr.preflight_http_status = 'passed') as can_http
|
||||||
|
FROM worker_registry wr
|
||||||
|
WHERE wr.status != 'terminated'
|
||||||
|
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 5: View for task queue with method info
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
DROP VIEW IF EXISTS v_task_history;
|
||||||
|
|
||||||
|
CREATE VIEW v_task_history AS
|
||||||
|
SELECT
|
||||||
|
t.id,
|
||||||
|
t.role,
|
||||||
|
t.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
t.platform,
|
||||||
|
t.status,
|
||||||
|
t.priority,
|
||||||
|
t.method,
|
||||||
|
t.worker_id,
|
||||||
|
t.scheduled_for,
|
||||||
|
t.claimed_at,
|
||||||
|
t.started_at,
|
||||||
|
t.completed_at,
|
||||||
|
t.error_message,
|
||||||
|
t.retry_count,
|
||||||
|
t.created_at,
|
||||||
|
EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
|
||||||
|
FROM worker_tasks t
|
||||||
|
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
|
||||||
|
ORDER BY t.created_at DESC;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 6: Helper function to update worker preflight status
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION update_worker_preflight(
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_transport VARCHAR(10), -- 'curl' or 'http'
|
||||||
|
p_status VARCHAR(20), -- 'passed', 'failed', 'skipped'
|
||||||
|
p_response_ms INTEGER DEFAULT NULL,
|
||||||
|
p_error TEXT DEFAULT NULL
|
||||||
|
) RETURNS VOID AS $$
|
||||||
|
BEGIN
|
||||||
|
IF p_transport = 'curl' THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_curl_status = p_status,
|
||||||
|
preflight_curl_at = NOW(),
|
||||||
|
preflight_curl_ms = p_response_ms,
|
||||||
|
preflight_curl_error = p_error,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
ELSIF p_transport = 'http' THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_http_status = p_status,
|
||||||
|
preflight_http_at = NOW(),
|
||||||
|
preflight_http_ms = p_response_ms,
|
||||||
|
preflight_http_error = p_error,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END IF;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- Comments
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_curl_status IS 'Status of curl/axios preflight: pending, passed, failed, skipped';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_http_status IS 'Status of http/Puppeteer preflight: pending, passed, failed, skipped';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_curl_at IS 'When curl preflight completed';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_http_at IS 'When http preflight completed';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_curl_error IS 'Error message if curl preflight failed';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_http_error IS 'Error message if http preflight failed';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_curl_ms IS 'Response time of successful curl preflight (ms)';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_http_ms IS 'Response time of successful http preflight (ms)';
|
||||||
|
|
||||||
|
COMMENT ON COLUMN worker_tasks.method IS 'Transport method required: NULL=any, curl=proxy-based, http=browser-based';
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION claim_task IS 'Atomically claim a task, respecting method requirements and per-store locking';
|
||||||
|
COMMENT ON FUNCTION update_worker_preflight IS 'Update a workers preflight status for a given transport';
|
||||||
168
backend/migrations/085_preflight_ip_fingerprint.sql
Normal file
168
backend/migrations/085_preflight_ip_fingerprint.sql
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
-- Migration 085: Add IP and fingerprint columns for preflight reporting
|
||||||
|
-- These columns were missing from migration 084
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 1: Add IP address columns to worker_registry
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
-- IP address detected during curl/axios preflight
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS curl_ip VARCHAR(45);
|
||||||
|
|
||||||
|
-- IP address detected during http/Puppeteer preflight
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS http_ip VARCHAR(45);
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 2: Add fingerprint data column
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
-- Browser fingerprint data captured during Puppeteer preflight
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS fingerprint_data JSONB;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 3: Add combined preflight status/timestamp for convenience
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
-- Overall preflight status (computed from both transports)
|
||||||
|
-- Values: 'pending', 'passed', 'partial', 'failed'
|
||||||
|
-- - 'pending': neither transport tested
|
||||||
|
-- - 'passed': both transports passed (or http passed for browser-only)
|
||||||
|
-- - 'partial': at least one passed
|
||||||
|
-- - 'failed': no transport passed
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_status VARCHAR(20) DEFAULT 'pending';
|
||||||
|
|
||||||
|
-- Most recent preflight completion timestamp
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 4: Update function to set preflight status
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION update_worker_preflight(
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_transport VARCHAR(10), -- 'curl' or 'http'
|
||||||
|
p_status VARCHAR(20), -- 'passed', 'failed', 'skipped'
|
||||||
|
p_ip VARCHAR(45) DEFAULT NULL,
|
||||||
|
p_response_ms INTEGER DEFAULT NULL,
|
||||||
|
p_error TEXT DEFAULT NULL,
|
||||||
|
p_fingerprint JSONB DEFAULT NULL
|
||||||
|
) RETURNS VOID AS $$
|
||||||
|
DECLARE
|
||||||
|
v_curl_status VARCHAR(20);
|
||||||
|
v_http_status VARCHAR(20);
|
||||||
|
v_overall_status VARCHAR(20);
|
||||||
|
BEGIN
|
||||||
|
IF p_transport = 'curl' THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_curl_status = p_status,
|
||||||
|
preflight_curl_at = NOW(),
|
||||||
|
preflight_curl_ms = p_response_ms,
|
||||||
|
preflight_curl_error = p_error,
|
||||||
|
curl_ip = p_ip,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
ELSIF p_transport = 'http' THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_http_status = p_status,
|
||||||
|
preflight_http_at = NOW(),
|
||||||
|
preflight_http_ms = p_response_ms,
|
||||||
|
preflight_http_error = p_error,
|
||||||
|
http_ip = p_ip,
|
||||||
|
fingerprint_data = COALESCE(p_fingerprint, fingerprint_data),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Update overall preflight status
|
||||||
|
SELECT preflight_curl_status, preflight_http_status
|
||||||
|
INTO v_curl_status, v_http_status
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
-- Compute overall status
|
||||||
|
IF v_curl_status = 'passed' AND v_http_status = 'passed' THEN
|
||||||
|
v_overall_status := 'passed';
|
||||||
|
ELSIF v_curl_status = 'passed' OR v_http_status = 'passed' THEN
|
||||||
|
v_overall_status := 'partial';
|
||||||
|
ELSIF v_curl_status = 'failed' OR v_http_status = 'failed' THEN
|
||||||
|
v_overall_status := 'failed';
|
||||||
|
ELSE
|
||||||
|
v_overall_status := 'pending';
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_status = v_overall_status,
|
||||||
|
preflight_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 5: Update v_active_workers view
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
DROP VIEW IF EXISTS v_active_workers;
|
||||||
|
|
||||||
|
CREATE VIEW v_active_workers AS
|
||||||
|
SELECT
|
||||||
|
wr.id,
|
||||||
|
wr.worker_id,
|
||||||
|
wr.friendly_name,
|
||||||
|
wr.role,
|
||||||
|
wr.status,
|
||||||
|
wr.pod_name,
|
||||||
|
wr.hostname,
|
||||||
|
wr.started_at,
|
||||||
|
wr.last_heartbeat_at,
|
||||||
|
wr.last_task_at,
|
||||||
|
wr.tasks_completed,
|
||||||
|
wr.tasks_failed,
|
||||||
|
wr.current_task_id,
|
||||||
|
-- IP addresses from preflights
|
||||||
|
wr.curl_ip,
|
||||||
|
wr.http_ip,
|
||||||
|
-- Combined preflight status
|
||||||
|
wr.preflight_status,
|
||||||
|
wr.preflight_at,
|
||||||
|
-- Detailed preflight status per transport
|
||||||
|
wr.preflight_curl_status,
|
||||||
|
wr.preflight_http_status,
|
||||||
|
wr.preflight_curl_at,
|
||||||
|
wr.preflight_http_at,
|
||||||
|
wr.preflight_curl_error,
|
||||||
|
wr.preflight_http_error,
|
||||||
|
wr.preflight_curl_ms,
|
||||||
|
wr.preflight_http_ms,
|
||||||
|
-- Fingerprint data
|
||||||
|
wr.fingerprint_data,
|
||||||
|
-- Computed fields
|
||||||
|
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
||||||
|
CASE
|
||||||
|
WHEN wr.status = 'offline' THEN 'offline'
|
||||||
|
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||||
|
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
||||||
|
ELSE 'ready'
|
||||||
|
END as health_status,
|
||||||
|
-- Capability flags (can this worker handle curl/http tasks?)
|
||||||
|
(wr.preflight_curl_status = 'passed') as can_curl,
|
||||||
|
(wr.preflight_http_status = 'passed') as can_http
|
||||||
|
FROM worker_registry wr
|
||||||
|
WHERE wr.status != 'terminated'
|
||||||
|
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- Comments
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
COMMENT ON COLUMN worker_registry.curl_ip IS 'IP address detected during curl/axios preflight';
|
||||||
|
COMMENT ON COLUMN worker_registry.http_ip IS 'IP address detected during Puppeteer preflight';
|
||||||
|
COMMENT ON COLUMN worker_registry.fingerprint_data IS 'Browser fingerprint captured during Puppeteer preflight';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_status IS 'Overall preflight status: pending, passed, partial, failed';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_at IS 'Most recent preflight completion timestamp';
|
||||||
59
backend/migrations/085_trusted_origins.sql
Normal file
59
backend/migrations/085_trusted_origins.sql
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
-- Migration 085: Trusted Origins Management
|
||||||
|
-- Allows admin to manage trusted IPs and domains via UI instead of hardcoded values
|
||||||
|
|
||||||
|
-- Trusted origins table (IPs and domains that bypass API key auth)
|
||||||
|
CREATE TABLE IF NOT EXISTS trusted_origins (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Origin type: 'ip', 'domain', 'pattern'
|
||||||
|
origin_type VARCHAR(20) NOT NULL CHECK (origin_type IN ('ip', 'domain', 'pattern')),
|
||||||
|
|
||||||
|
-- The actual value
|
||||||
|
-- For ip: '127.0.0.1', '::1', '192.168.1.0/24'
|
||||||
|
-- For domain: 'cannaiq.co', 'findadispo.com'
|
||||||
|
-- For pattern: '^https://.*\.cannabrands\.app$' (regex)
|
||||||
|
origin_value VARCHAR(255) NOT NULL,
|
||||||
|
|
||||||
|
-- Description for admin reference
|
||||||
|
description TEXT,
|
||||||
|
|
||||||
|
-- Active flag
|
||||||
|
active BOOLEAN DEFAULT true,
|
||||||
|
|
||||||
|
-- Audit
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
created_by INTEGER REFERENCES users(id),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
UNIQUE(origin_type, origin_value)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for quick lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_trusted_origins_active ON trusted_origins(active) WHERE active = true;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_trusted_origins_type ON trusted_origins(origin_type, active);
|
||||||
|
|
||||||
|
-- Seed with current hardcoded values
|
||||||
|
INSERT INTO trusted_origins (origin_type, origin_value, description) VALUES
|
||||||
|
-- Trusted IPs (localhost)
|
||||||
|
('ip', '127.0.0.1', 'Localhost IPv4'),
|
||||||
|
('ip', '::1', 'Localhost IPv6'),
|
||||||
|
('ip', '::ffff:127.0.0.1', 'Localhost IPv4-mapped IPv6'),
|
||||||
|
|
||||||
|
-- Trusted domains
|
||||||
|
('domain', 'cannaiq.co', 'CannaiQ production'),
|
||||||
|
('domain', 'www.cannaiq.co', 'CannaiQ production (www)'),
|
||||||
|
('domain', 'findadispo.com', 'FindADispo production'),
|
||||||
|
('domain', 'www.findadispo.com', 'FindADispo production (www)'),
|
||||||
|
('domain', 'findagram.co', 'Findagram production'),
|
||||||
|
('domain', 'www.findagram.co', 'Findagram production (www)'),
|
||||||
|
('domain', 'localhost:3010', 'Local backend dev'),
|
||||||
|
('domain', 'localhost:8080', 'Local admin dev'),
|
||||||
|
('domain', 'localhost:5173', 'Local Vite dev'),
|
||||||
|
|
||||||
|
-- Pattern-based (regex)
|
||||||
|
('pattern', '^https://.*\.cannabrands\.app$', 'All cannabrands.app subdomains'),
|
||||||
|
('pattern', '^https://.*\.cannaiq\.co$', 'All cannaiq.co subdomains')
|
||||||
|
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||||
|
|
||||||
|
-- Add comment
|
||||||
|
COMMENT ON TABLE trusted_origins IS 'IPs and domains that bypass API key authentication. Managed via /admin.';
|
||||||
10
backend/migrations/086_proxy_url_column.sql
Normal file
10
backend/migrations/086_proxy_url_column.sql
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
-- Migration 086: Add proxy_url column for alternative URL formats
|
||||||
|
-- Some proxy providers use non-standard URL formats (e.g., host:port:user:pass)
|
||||||
|
-- This column allows storing the raw URL directly
|
||||||
|
|
||||||
|
-- Add proxy_url column - if set, used directly instead of constructing from parts
|
||||||
|
ALTER TABLE proxies
|
||||||
|
ADD COLUMN IF NOT EXISTS proxy_url TEXT;
|
||||||
|
|
||||||
|
-- Add comment
|
||||||
|
COMMENT ON COLUMN proxies.proxy_url IS 'Raw proxy URL (if provider uses non-standard format). Takes precedence over constructed URL from host/port/user/pass.';
|
||||||
30
backend/migrations/088_discovery_payloads.sql
Normal file
30
backend/migrations/088_discovery_payloads.sql
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
-- Migration 088: Extend raw_crawl_payloads for discovery payloads
|
||||||
|
--
|
||||||
|
-- Enables saving raw store data from Dutchie discovery crawls.
|
||||||
|
-- Store discovery returns raw dispensary objects - save them for historical analysis.
|
||||||
|
|
||||||
|
-- Add payload_type to distinguish product crawls from discovery crawls
|
||||||
|
ALTER TABLE raw_crawl_payloads
|
||||||
|
ADD COLUMN IF NOT EXISTS payload_type VARCHAR(32) NOT NULL DEFAULT 'product';
|
||||||
|
|
||||||
|
-- Add state_code for discovery payloads (null for product payloads)
|
||||||
|
ALTER TABLE raw_crawl_payloads
|
||||||
|
ADD COLUMN IF NOT EXISTS state_code VARCHAR(10);
|
||||||
|
|
||||||
|
-- Add store_count for discovery payloads (alternative to product_count)
|
||||||
|
ALTER TABLE raw_crawl_payloads
|
||||||
|
ADD COLUMN IF NOT EXISTS store_count INTEGER;
|
||||||
|
|
||||||
|
-- Make dispensary_id nullable for discovery payloads
|
||||||
|
ALTER TABLE raw_crawl_payloads
|
||||||
|
ALTER COLUMN dispensary_id DROP NOT NULL;
|
||||||
|
|
||||||
|
-- Add index for discovery payload queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_type_state
|
||||||
|
ON raw_crawl_payloads(payload_type, state_code)
|
||||||
|
WHERE payload_type = 'store_discovery';
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.payload_type IS 'Type: product (default), store_discovery';
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.state_code IS 'State code for discovery payloads (e.g., AZ, MI)';
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.store_count IS 'Number of stores in discovery payload';
|
||||||
105
backend/migrations/089_immutable_schedules.sql
Normal file
105
backend/migrations/089_immutable_schedules.sql
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
-- Migration 089: Immutable Schedules with Per-State Product Discovery
|
||||||
|
--
|
||||||
|
-- Key changes:
|
||||||
|
-- 1. Add is_immutable column - schedules can be edited but not deleted
|
||||||
|
-- 2. Add method column - all tasks use 'http' (Puppeteer transport)
|
||||||
|
-- 3. Store discovery weekly (168h)
|
||||||
|
-- 4. Per-state product_discovery schedules (4h default)
|
||||||
|
-- 5. Remove old payload_fetch schedules
|
||||||
|
|
||||||
|
-- =====================================================
|
||||||
|
-- 1) Add new columns to task_schedules
|
||||||
|
-- =====================================================
|
||||||
|
ALTER TABLE task_schedules
|
||||||
|
ADD COLUMN IF NOT EXISTS is_immutable BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
ALTER TABLE task_schedules
|
||||||
|
ADD COLUMN IF NOT EXISTS method VARCHAR(10) DEFAULT 'http';
|
||||||
|
|
||||||
|
-- =====================================================
|
||||||
|
-- 2) Update store_discovery to weekly and immutable
|
||||||
|
-- =====================================================
|
||||||
|
UPDATE task_schedules
|
||||||
|
SET interval_hours = 168, -- 7 days
|
||||||
|
is_immutable = TRUE,
|
||||||
|
method = 'http',
|
||||||
|
description = 'Discover new Dutchie stores weekly (HTTP transport)'
|
||||||
|
WHERE name IN ('store_discovery_dutchie', 'Store Discovery');
|
||||||
|
|
||||||
|
-- Insert if doesn't exist
|
||||||
|
INSERT INTO task_schedules (name, role, interval_hours, priority, description, is_immutable, method, platform, next_run_at)
|
||||||
|
VALUES ('Store Discovery', 'store_discovery', 168, 5, 'Discover new Dutchie stores weekly (HTTP transport)', TRUE, 'http', 'dutchie', NOW())
|
||||||
|
ON CONFLICT (name) DO UPDATE SET
|
||||||
|
interval_hours = 168,
|
||||||
|
is_immutable = TRUE,
|
||||||
|
method = 'http',
|
||||||
|
description = 'Discover new Dutchie stores weekly (HTTP transport)';
|
||||||
|
|
||||||
|
-- =====================================================
|
||||||
|
-- 3) Remove old payload_fetch and product_refresh_all schedules
|
||||||
|
-- =====================================================
|
||||||
|
DELETE FROM task_schedules WHERE name IN ('payload_fetch_all', 'product_refresh_all');
|
||||||
|
|
||||||
|
-- =====================================================
|
||||||
|
-- 4) Create per-state product_discovery schedules
|
||||||
|
-- =====================================================
|
||||||
|
-- One schedule per state that has dispensaries with active cannabis programs
|
||||||
|
INSERT INTO task_schedules (name, role, state_code, interval_hours, priority, description, is_immutable, method, enabled, next_run_at)
|
||||||
|
SELECT
|
||||||
|
'product_discovery_' || lower(s.code) AS name,
|
||||||
|
'product_discovery' AS role,
|
||||||
|
s.code AS state_code,
|
||||||
|
4 AS interval_hours, -- 4 hours default, editable
|
||||||
|
10 AS priority,
|
||||||
|
'Product discovery for ' || s.name || ' dispensaries (HTTP transport)' AS description,
|
||||||
|
TRUE AS is_immutable, -- Can edit but not delete
|
||||||
|
'http' AS method,
|
||||||
|
CASE WHEN s.is_active THEN TRUE ELSE FALSE END AS enabled,
|
||||||
|
-- Stagger start times: each state starts 5 minutes after the previous
|
||||||
|
NOW() + (ROW_NUMBER() OVER (ORDER BY s.code) * INTERVAL '5 minutes') AS next_run_at
|
||||||
|
FROM states s
|
||||||
|
WHERE EXISTS (
|
||||||
|
SELECT 1 FROM dispensaries d
|
||||||
|
WHERE d.state_id = s.id AND d.crawl_enabled = true
|
||||||
|
)
|
||||||
|
ON CONFLICT (name) DO UPDATE SET
|
||||||
|
is_immutable = TRUE,
|
||||||
|
method = 'http',
|
||||||
|
description = EXCLUDED.description;
|
||||||
|
|
||||||
|
-- Also create schedules for states that might have stores discovered later
|
||||||
|
INSERT INTO task_schedules (name, role, state_code, interval_hours, priority, description, is_immutable, method, enabled, next_run_at)
|
||||||
|
SELECT
|
||||||
|
'product_discovery_' || lower(s.code) AS name,
|
||||||
|
'product_discovery' AS role,
|
||||||
|
s.code AS state_code,
|
||||||
|
4 AS interval_hours,
|
||||||
|
10 AS priority,
|
||||||
|
'Product discovery for ' || s.name || ' dispensaries (HTTP transport)' AS description,
|
||||||
|
TRUE AS is_immutable,
|
||||||
|
'http' AS method,
|
||||||
|
FALSE AS enabled, -- Disabled until stores exist
|
||||||
|
NOW() + INTERVAL '1 hour'
|
||||||
|
FROM states s
|
||||||
|
WHERE NOT EXISTS (
|
||||||
|
SELECT 1 FROM task_schedules ts WHERE ts.name = 'product_discovery_' || lower(s.code)
|
||||||
|
)
|
||||||
|
ON CONFLICT (name) DO NOTHING;
|
||||||
|
|
||||||
|
-- =====================================================
|
||||||
|
-- 5) Make analytics_refresh immutable
|
||||||
|
-- =====================================================
|
||||||
|
UPDATE task_schedules
|
||||||
|
SET is_immutable = TRUE, method = 'http'
|
||||||
|
WHERE name = 'analytics_refresh';
|
||||||
|
|
||||||
|
-- =====================================================
|
||||||
|
-- 6) Add index for schedule lookups
|
||||||
|
-- =====================================================
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_task_schedules_state_code
|
||||||
|
ON task_schedules(state_code)
|
||||||
|
WHERE state_code IS NOT NULL;
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON COLUMN task_schedules.is_immutable IS 'If TRUE, schedule cannot be deleted (only edited)';
|
||||||
|
COMMENT ON COLUMN task_schedules.method IS 'Transport method: http (Puppeteer/browser) or curl (axios)';
|
||||||
66
backend/migrations/090_modification_tracking.sql
Normal file
66
backend/migrations/090_modification_tracking.sql
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
-- Migration 090: Add modification tracking columns
|
||||||
|
--
|
||||||
|
-- Tracks when records were last modified and by which task.
|
||||||
|
-- Enables debugging, auditing, and understanding data freshness.
|
||||||
|
--
|
||||||
|
-- Columns added:
|
||||||
|
-- last_modified_at - When the record was last modified by a task
|
||||||
|
-- last_modified_by_task - Which task role modified it (e.g., 'product_refresh')
|
||||||
|
-- last_modified_task_id - The specific task ID that modified it
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- dispensaries table
|
||||||
|
-- ============================================================
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS last_modified_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS last_modified_by_task VARCHAR(50);
|
||||||
|
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS last_modified_task_id INTEGER;
|
||||||
|
|
||||||
|
-- Index for querying recently modified records
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_last_modified
|
||||||
|
ON dispensaries(last_modified_at DESC)
|
||||||
|
WHERE last_modified_at IS NOT NULL;
|
||||||
|
|
||||||
|
-- Index for querying by task type
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_modified_by_task
|
||||||
|
ON dispensaries(last_modified_by_task)
|
||||||
|
WHERE last_modified_by_task IS NOT NULL;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN dispensaries.last_modified_at IS 'Timestamp when this record was last modified by a task';
|
||||||
|
COMMENT ON COLUMN dispensaries.last_modified_by_task IS 'Task role that last modified this record (e.g., store_discovery_state, entry_point_discovery)';
|
||||||
|
COMMENT ON COLUMN dispensaries.last_modified_task_id IS 'ID of the worker_tasks record that last modified this';
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- store_products table
|
||||||
|
-- ============================================================
|
||||||
|
ALTER TABLE store_products
|
||||||
|
ADD COLUMN IF NOT EXISTS last_modified_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
ALTER TABLE store_products
|
||||||
|
ADD COLUMN IF NOT EXISTS last_modified_by_task VARCHAR(50);
|
||||||
|
|
||||||
|
ALTER TABLE store_products
|
||||||
|
ADD COLUMN IF NOT EXISTS last_modified_task_id INTEGER;
|
||||||
|
|
||||||
|
-- Index for querying recently modified products
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_last_modified
|
||||||
|
ON store_products(last_modified_at DESC)
|
||||||
|
WHERE last_modified_at IS NOT NULL;
|
||||||
|
|
||||||
|
-- Index for querying by task type
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_modified_by_task
|
||||||
|
ON store_products(last_modified_by_task)
|
||||||
|
WHERE last_modified_by_task IS NOT NULL;
|
||||||
|
|
||||||
|
-- Composite index for finding products modified by a specific task
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_task_modified
|
||||||
|
ON store_products(dispensary_id, last_modified_at DESC)
|
||||||
|
WHERE last_modified_at IS NOT NULL;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN store_products.last_modified_at IS 'Timestamp when this record was last modified by a task';
|
||||||
|
COMMENT ON COLUMN store_products.last_modified_by_task IS 'Task role that last modified this record (e.g., product_refresh, product_discovery)';
|
||||||
|
COMMENT ON COLUMN store_products.last_modified_task_id IS 'ID of the worker_tasks record that last modified this';
|
||||||
26
backend/migrations/091_store_discovery_tracking.sql
Normal file
26
backend/migrations/091_store_discovery_tracking.sql
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
-- Migration 091: Add store discovery tracking columns
|
||||||
|
-- Per auto-healing scheme (2025-12-12):
|
||||||
|
-- Track when store_discovery last updated each dispensary
|
||||||
|
-- Track when last payload was saved
|
||||||
|
|
||||||
|
-- Add last_store_discovery_at to track when store_discovery updated this record
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS last_store_discovery_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
-- Add last_payload_at to track when last product payload was saved
|
||||||
|
-- (Complements last_fetch_at which tracks API fetch time)
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS last_payload_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
-- Add index for finding stale discovery data
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_store_discovery_at
|
||||||
|
ON dispensaries (last_store_discovery_at DESC NULLS LAST)
|
||||||
|
WHERE crawl_enabled = true;
|
||||||
|
|
||||||
|
-- Add index for finding dispensaries without recent payloads
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_payload_at
|
||||||
|
ON dispensaries (last_payload_at DESC NULLS LAST)
|
||||||
|
WHERE crawl_enabled = true;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN dispensaries.last_store_discovery_at IS 'When store_discovery task last updated this record';
|
||||||
|
COMMENT ON COLUMN dispensaries.last_payload_at IS 'When last product payload was saved for this dispensary';
|
||||||
30
backend/migrations/092_fix_trulieve_urls.sql
Normal file
30
backend/migrations/092_fix_trulieve_urls.sql
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
-- Fix 3 Trulieve/Harvest stores with incorrect menu URLs
|
||||||
|
-- These records have NULL or mismatched platform_dispensary_id so store_discovery
|
||||||
|
-- ON CONFLICT can't update them automatically
|
||||||
|
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET
|
||||||
|
menu_url = 'https://dutchie.com/dispensary/svaccha-llc-nirvana-center-apache-junction',
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = 224;
|
||||||
|
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET
|
||||||
|
menu_url = 'https://dutchie.com/dispensary/trulieve-of-phoenix-tatum',
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = 76;
|
||||||
|
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET
|
||||||
|
menu_url = 'https://dutchie.com/dispensary/harvest-of-havasu',
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = 403;
|
||||||
|
|
||||||
|
-- Queue entry_point_discovery tasks to resolve their platform_dispensary_id
|
||||||
|
-- method='http' ensures only workers that passed http preflight can claim these
|
||||||
|
INSERT INTO worker_tasks (role, dispensary_id, priority, scheduled_for, method)
|
||||||
|
VALUES
|
||||||
|
('entry_point_discovery', 224, 5, NOW(), 'http'),
|
||||||
|
('entry_point_discovery', 76, 5, NOW(), 'http'),
|
||||||
|
('entry_point_discovery', 403, 5, NOW(), 'http')
|
||||||
|
ON CONFLICT DO NOTHING;
|
||||||
35
backend/migrations/092_store_intelligence_cache.sql
Normal file
35
backend/migrations/092_store_intelligence_cache.sql
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
-- Migration 092: Store Intelligence Cache
|
||||||
|
-- Pre-computed store intelligence data refreshed by analytics_refresh task
|
||||||
|
-- Eliminates costly aggregation queries on /intelligence/stores endpoint
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS store_intelligence_cache (
|
||||||
|
dispensary_id INTEGER PRIMARY KEY REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
|
||||||
|
-- Basic counts
|
||||||
|
sku_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
brand_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
snapshot_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
|
||||||
|
-- Pricing
|
||||||
|
avg_price_rec NUMERIC(10,2),
|
||||||
|
avg_price_med NUMERIC(10,2),
|
||||||
|
min_price NUMERIC(10,2),
|
||||||
|
max_price NUMERIC(10,2),
|
||||||
|
|
||||||
|
-- Category breakdown (JSONB for flexibility)
|
||||||
|
category_counts JSONB DEFAULT '{}',
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
last_crawl_at TIMESTAMPTZ,
|
||||||
|
last_refresh_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
-- Metadata
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for fast lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_intelligence_cache_refresh
|
||||||
|
ON store_intelligence_cache (last_refresh_at DESC);
|
||||||
|
|
||||||
|
COMMENT ON TABLE store_intelligence_cache IS 'Pre-computed store intelligence metrics, refreshed by analytics_refresh task';
|
||||||
|
COMMENT ON COLUMN store_intelligence_cache.category_counts IS 'JSON object mapping category_raw to product count';
|
||||||
43
backend/migrations/093_fix_mv_state_metrics.sql
Normal file
43
backend/migrations/093_fix_mv_state_metrics.sql
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
-- Migration: 093_fix_mv_state_metrics.sql
|
||||||
|
-- Purpose: Fix mv_state_metrics to use brand_name_raw and show correct store counts
|
||||||
|
-- Issues fixed:
|
||||||
|
-- 1. unique_brands used brand_id (often NULL), now uses brand_name_raw
|
||||||
|
-- 2. Added out_of_stock_products column
|
||||||
|
-- 3. dispensary_count now correctly named
|
||||||
|
|
||||||
|
-- Drop and recreate the materialized view with correct definition
|
||||||
|
DROP MATERIALIZED VIEW IF EXISTS mv_state_metrics;
|
||||||
|
|
||||||
|
CREATE MATERIALIZED VIEW mv_state_metrics AS
|
||||||
|
SELECT
|
||||||
|
d.state,
|
||||||
|
s.name AS state_name,
|
||||||
|
COUNT(DISTINCT d.id) AS dispensary_count,
|
||||||
|
COUNT(DISTINCT CASE WHEN d.menu_type = 'dutchie' THEN d.id END) AS dutchie_stores,
|
||||||
|
COUNT(DISTINCT CASE WHEN d.crawl_enabled = true THEN d.id END) AS active_stores,
|
||||||
|
COUNT(sp.id) AS total_products,
|
||||||
|
COUNT(CASE WHEN COALESCE(sp.is_in_stock, true) THEN sp.id END) AS in_stock_products,
|
||||||
|
COUNT(CASE WHEN sp.is_in_stock = false THEN sp.id END) AS out_of_stock_products,
|
||||||
|
COUNT(CASE WHEN sp.is_on_special THEN sp.id END) AS on_special_products,
|
||||||
|
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != '') AS unique_brands,
|
||||||
|
COUNT(DISTINCT sp.category_raw) FILTER (WHERE sp.category_raw IS NOT NULL) AS unique_categories,
|
||||||
|
ROUND(AVG(sp.price_rec) FILTER (WHERE sp.price_rec > 0)::NUMERIC, 2) AS avg_price_rec,
|
||||||
|
MIN(sp.price_rec) FILTER (WHERE sp.price_rec > 0) AS min_price_rec,
|
||||||
|
MAX(sp.price_rec) FILTER (WHERE sp.price_rec > 0) AS max_price_rec,
|
||||||
|
NOW() AS refreshed_at
|
||||||
|
FROM dispensaries d
|
||||||
|
LEFT JOIN states s ON d.state = s.code
|
||||||
|
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
|
||||||
|
WHERE d.state IS NOT NULL
|
||||||
|
GROUP BY d.state, s.name;
|
||||||
|
|
||||||
|
-- Create unique index for CONCURRENTLY refresh support
|
||||||
|
CREATE UNIQUE INDEX idx_mv_state_metrics_state ON mv_state_metrics(state);
|
||||||
|
|
||||||
|
-- Update refresh function
|
||||||
|
CREATE OR REPLACE FUNCTION refresh_state_metrics()
|
||||||
|
RETURNS void AS $$
|
||||||
|
BEGIN
|
||||||
|
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_state_metrics;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
516
backend/migrations/094_import_evomi_proxies.sql
Normal file
516
backend/migrations/094_import_evomi_proxies.sql
Normal file
@@ -0,0 +1,516 @@
|
|||||||
|
-- Migration: Import 500 Evomi residential proxies
|
||||||
|
-- These are sticky-session rotating proxies where password contains session ID
|
||||||
|
-- Active is set to false - run Test All to verify and activate
|
||||||
|
|
||||||
|
-- First, drop the old unique constraint that doesn't account for username/password
|
||||||
|
ALTER TABLE proxies DROP CONSTRAINT IF EXISTS proxies_host_port_protocol_key;
|
||||||
|
|
||||||
|
-- Add new unique constraint that includes username and password
|
||||||
|
-- This allows multiple entries for the same host:port with different credentials (sessions)
|
||||||
|
ALTER TABLE proxies ADD CONSTRAINT proxies_host_port_protocol_username_password_key
|
||||||
|
UNIQUE(host, port, protocol, username, password);
|
||||||
|
|
||||||
|
-- Now insert all 500 proxies
|
||||||
|
INSERT INTO proxies (host, port, protocol, username, password, active, max_connections)
|
||||||
|
VALUES
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4XRRPF1UQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5UNGX7N7K', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9PSKYP1GU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GZBKKYL2S', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YHJHM0XZU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ESDYQ34CJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GAXUMFKQI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2FF66K4CI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SUYM0R49B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A8VHZMEFP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WNRLH6NXR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SPSB3IUX6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-85N76UU5Q', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-189P3LH2F', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-47DQOAGWY', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IBT0QO7M2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UPXOUOH8X', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BFQ1PH75D', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KNTFKRY1J', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5L8IG6DZX', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9YE13X0BA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6KBHCHF0I', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CETHHFHZ6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A06J8ST3I', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YFS93P1YR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RB74B3R6C', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2JW27O3EU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCUX84BL0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1A2KSG6HO', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4QW8ILV0E', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0Q09GH2VL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-16BRXBCYC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9W02B3R4L', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CVAEH76YT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CATOG0Q5I', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F81625L74', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DO4AVTPK4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SBZPXORD5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JA1AWOX03', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0FUJTRSYT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CM1R2RSTB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EHPJZCK1S', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYLKORNAF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-05A8BUD25', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RHM1Q6O4M', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ES5VPCE6Z', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-P0JEGLP4O', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OC4AX88D0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3BN54IEBV', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ABSC7S550', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LNIJU6R2V', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OYGQPPCOV', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-32YBOHQWR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7KGEMK4SL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FAW8T2EBW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GPV69KI9T', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JPBHSN8M2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VZ1JQOF15', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7DJXXPK1E', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JXKQ7JVZ1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-88Q5UQX3B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HAI5K0JFO', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-65SUKG0QH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1XFJETX1F', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7ZNUCVCBW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O1DCK15LA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WLTEA65WB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCHAFNK2P', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6ODSZ6CUT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SZ8R2EFH4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9EPPYQREC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MPCBES7UI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FCCPL0XWZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GJ23UYEGI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RQT80689I', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TDQO2AP5E', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D5Q5SEUEO', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DZN4ZTENM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4HVQ33VK9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F1HJ7GPHA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RM708QD2Y', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-K36N27GM5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O73TS0DAE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-54QXRWEA8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1P6LP0365', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WMZ2ST34E', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-175UYF58T', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-W0HTK6F28', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D5275CTIM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IH2IWVZOH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C4VFW7GSA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O9XGULSNA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PJ1W1P5L9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MQQU30KPC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BNPIBZTYV', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7BNRCH922', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5AZLU117B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3PPJ49VJC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FMC8CQO74', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VCHW23CXJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1S4749PCB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0T9DJFZPK', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-L0RMV65W3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FZ1ZZUQNA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6IFJD23DI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZKUEP5XM0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z8KU62CLT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LO77J78X1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-27FBKYRJ4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0TDQTESGW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IMKI89WQ1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ANS65MIJS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O3T2OTT0Y', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MWW6Z1QVM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TT47MX0BB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-59CFKTM14', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DOD61TVZN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RH9Y298WS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X98AATJ7B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C3UMES1W8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8O3J7G3PT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3K4OH78OJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N4A3JMVL1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HK1SRLAC9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y9VLJJXVU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KTTH7R0EC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JKVX01E8T', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HW2VPAHJO', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7WZ9UHBH8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JTKFK0CP7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-G3F27NXG5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-K7I2JWYSP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CTUU8UQ0T', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ISHMAP6RQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LVWNZ1LHP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N5CQ1YG2Z', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XL2XY2SLZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UCRZVFIV1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VLGQFYNEL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YPCDM9O5Y', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R6VA2S25E', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4W8X8BBUL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5INDC8M80', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q8RKKOF29', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B5ED3EFBC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8IC5ZXAX1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCGM25D75', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1MO06IRID', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4QWGUGN6W', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5T9M5KEHT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9KG7W7NZF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NYGN5R2CL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H61OXFCJ2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-30WSQ4EFH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-J36NG6MY2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TZU34ZA7A', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LPWNYL74G', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DDJTXOS4Z', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HFOS4S185', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2MLGIFL1M', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CI5AHX0TC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WSXVCH1WN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8F0C3D06T', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3YZR0664F', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1L2VMWTM0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KPMCB57O7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N6QXQDZV3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-35FAYFWDP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TVZWE2JR8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0WK86IKLF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8WBU6ESHJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGU6UNM01', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-86CXNEQZC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NZ4LFCHE3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZKB6D72RF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BKXNG77NS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3MJ332POD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SL9VEYNJ0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LY8KO43Z8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8KGF1XR1L', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WT6FB54HW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7UQ9JMG5E', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KX3L2040U', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HL809F9WU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-T9GU40ERH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I5O2NX3G9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RVOUYU3NO', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2T3ETNUKS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SW0B93DZZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PQ55UF3K6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VNRWWHHJB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Q26FZ7EP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZWD9FA90J', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QSGMQX3RZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-83NZ9MEAC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q9QQ4AL37', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QBE9KD60Z', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NRNUXUO44', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8F0XKQ9P8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-095JV1CJN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WRRSIRUTZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DTUD7IDQI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ASCEAI9LD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YOUM7BJZH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PEG2ZH9J3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WAUW31F78', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GIBZ6U7AQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-63TD9LFBG', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0MH1N9MJB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YFP9RNQIK', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SW4N5162D', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-53MWFB2MP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QWLUKBMIN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JHS6QIX9G', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6R04HZ5UD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OUJLT31VN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6BMKW933S', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R4GG84E4Q', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-00XAP630X', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AK97MC2A0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NBS2GKGO5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NVFEWK4S5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MTV3WSYS1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JS8RM4JGW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6NL4QR1XN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4BUUQVSN6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-56WEAAU3M', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WCA56PFTF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TK1QAZP0B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SYZ5ADFXP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-S3VLOUW6G', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V2K1V1JWJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MZ6VHV5PQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DRZDQDPN3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-231VVRYYA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-06G3MC88G', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WS52I2ZVD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QTNQD55U', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EX7ALECU3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DQN8TVQY6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FJT54OQFI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BLTYUF7QR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8DL2JXDSO', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KBAOXIJ4Y', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYL28R5UW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NCRDA8LYB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BQYKXQLXU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PSHCS65MR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-90Y1WFVYZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4GG33NUPW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5Y0A79GED', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RMZHTAD6J', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XBSOJ5I36', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AAJW53VNE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9NYSPSEL6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-94WMY337S', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-35Y3BJQFW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R7WY3TMRC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RXAQVH0F3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EFQ2AVFSB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XPOUJSAVD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RSHPF5NTT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z9402336V', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OI36C5WOJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XEOGV1LVS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QIQDXG9NC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9IY242GGT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PQTEUT52E', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-18NKI3WPS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-34U3QAA49', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-S05TYKBBF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B4J8WCWDD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HR377WC28', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PNRR7S1T2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UNR0N0KJ9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NARQQANBE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8PUL1MYUU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KJPCT1FP3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGC80N0AM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Y1JN8DH3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y56M31T07', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NHYHXQSV1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V30RZVG7L', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CR6V2GSOU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VSAF5O0LJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4F4BF2LFH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ERSMQHXNX', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q0TFLZQWS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZXCS6SMHD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JHXYAUGRA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IT2XYWES2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-22UCD94OG', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VGDLQ3K35', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O8AFL8RGX', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9RBIZ8G9X', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9JIU0SVBV', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PWRBG0GWU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZME1MX12T', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A7LWRKSJP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5XISX0HD4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5T6EXKD3Z', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-10ILV351B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FDULBZDIY', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SFVR6I980', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FKV8DCZGT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ECRK3M3IZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WMKSLOF39', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HGE60O6AL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RGCWDJOT8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DESWK5KVN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RD593HJ92', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XWNCAO39B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AQ4XGDLX8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2ZOVEA1PL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JF4FUX83X', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CQ228GK3B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XCTMU9I7U', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-M3F37T22W', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ASZUXM9M9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CJVHX24WW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KZT4T898V', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RI128R5TE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HCAG6X9MJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XOQENWBP7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1LTQGM497', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZLVZT4O1G', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FTIXTXCIA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O2YE6QNHY', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0JPDDBF47', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H1FP1IFJI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FYBPBMY5B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F7BWDVC97', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MLENB1LQ4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FT9YNU8UP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5W21Q2O5L', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YM61QWPR3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XXFQJJHZM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H52YKCM9X', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NT56ZNZ54', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DRJY7BMB5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-P6886RPXX', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PBXW2EY5K', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5VQCJTM36', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NMM3GGM1J', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1JQQ0CDSA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R89YI91K4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7L7L9MXOT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-50Z7MXKZS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EGADRZTIB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1DR7H46H6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O28QZL994', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EYTRWVERM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HAJZAUWJV', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AGYO3AB89', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V224329ZM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4YTMSFWYK', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QP40RL1N1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CB1BVAMAH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9VGXUY02O', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BCPVVKCZ3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VDC3CWZX7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7HWLI21FA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5QWIUJEFM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4C3PBMAIZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QC7DM7PH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A6R5G3FWV', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3A6WDE12Z', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0F2LZA9RU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGBJXMXRX', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5YOGR8PQ1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LPBFBUF3N', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TUSPGR2AY', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-G05I8M2FQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H5NDXJIAQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X8FJL8WQZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KIB2FQRUP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VNV0OYWR7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GKBPM3PB2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XVPI30KE7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y3PRMJP51', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KEPP5SBML', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0PDUZ6QEQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1GHWWFLLE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-149S2TO8O', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1ZB6FSIGE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VCRQTXDZL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-645JVC3XL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2HJ00JBSR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7FZDG2W65', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HD6ANE3LN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2HS1B1J8V', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IHOHYMDF5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYZMAFEKF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JO85WX5JE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RURJDCURW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FZC3BLXPJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B0YR2LOZ1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6ZFP58ZRK', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UMZDLHQ78', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8A2IHDXY3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EDYEPWUMT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X3TM99R12', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DLV0UTQ72', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SFU0ZYIM0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YAJ6A66NH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X8CFU41AU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CJ3Z4WP32', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UJBLRQKXA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-T78R8EBGH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DDIH55GNZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F1SSD4NWF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4BE55FKRD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BG2DFBL46', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MKVMNR7W4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C3Z4JUGU5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NVP8EEEGQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MQFWP2LU7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BH873JG6H', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3D76651SM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KZ7V6KWMP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CD8NEJFJN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PWXE9L30H', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1RT95F5LR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q7CEEROE5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q08APOAEG', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NNKREGLXE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YQEG33MKX', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VRD9G7H5K', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-68R86GQ1G', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BXZUKQL2M', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QM13UD73C', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I7OOGJLNS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GXDBO1IQJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JJZPRFMWN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DBTDFITGW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VYHL6ASIJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F61NNU332', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6Z9H72KMC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WVOONDMA9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CXTSTBXN3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CSMZLC921', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3FTBSARZJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ESHGKBXLY', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-E0YLXW5H4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QFI6UMWE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-23VOWHO88', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-02Q9U5QCH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3POMNSMB0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NTT8OWUFQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MT5XEHJWX', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ILDOY0PCQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MN9HU4DGO', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1YOPU7GLL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZC5BM5MYB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UD3FXK3I9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LMDJOV52Y', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N45X16BSL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1CBY3Z7QC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F0D3AO9E6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YQA8GUOD1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2EE999233', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D6GD5WT2Y', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7DFBMLTMY', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-J6TJKC6VJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2AWQ3ZRF4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4KOVIF5W3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3489SXI1U', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F37VKUHVE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GHBMAVCE4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-W64U46547', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1GUJV1MGQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-M13IOZVI9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TX7EVZN1Z', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2PTS2ML8J', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VTG83RVX7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2IOE6BR66', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I68XZMR23', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q940UN6MU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Y9NFR0N0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MYP341DZ8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WJ68VGKAZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-819MSDR9H', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-27CGND4VG', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YYDOD47BF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YU7F6J8G5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HMY16WTCA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FPWEBRLG2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FGE79X0DE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-551LMZ84R', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UWMBDCTX4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BNHQXW9HY', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WB0P5LCN6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z4P9E1SVG', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UVW2G9IRN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OO93WVLB0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NTRIK82TG', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8TXV42S74', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z74LKL50G', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QQEXNIPTR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WGK2VD34L', false, 1)
|
||||||
|
ON CONFLICT DO NOTHING;
|
||||||
81
backend/migrations/095_proxy_auto_retry.sql
Normal file
81
backend/migrations/095_proxy_auto_retry.sql
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
-- Migration: Auto-retry failed proxies after cooldown period
|
||||||
|
-- Proxies that fail will be retried after a configurable interval
|
||||||
|
|
||||||
|
-- Add last_failed_at column to track when proxy last failed
|
||||||
|
ALTER TABLE proxies ADD COLUMN IF NOT EXISTS last_failed_at TIMESTAMP;
|
||||||
|
|
||||||
|
-- Add retry settings
|
||||||
|
INSERT INTO settings (key, value, description)
|
||||||
|
VALUES
|
||||||
|
('proxy_retry_interval_hours', '4', 'Hours to wait before retrying a failed proxy'),
|
||||||
|
('proxy_max_failures_before_permanent', '10', 'Max failures before proxy is permanently disabled')
|
||||||
|
ON CONFLICT (key) DO NOTHING;
|
||||||
|
|
||||||
|
-- Create function to get eligible proxies (active OR failed but past retry interval)
|
||||||
|
CREATE OR REPLACE FUNCTION get_eligible_proxy_ids()
|
||||||
|
RETURNS TABLE(proxy_id INT) AS $$
|
||||||
|
DECLARE
|
||||||
|
retry_hours INT;
|
||||||
|
BEGIN
|
||||||
|
-- Get retry interval from settings (default 4 hours)
|
||||||
|
SELECT COALESCE(value::int, 4) INTO retry_hours
|
||||||
|
FROM settings WHERE key = 'proxy_retry_interval_hours';
|
||||||
|
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT p.id
|
||||||
|
FROM proxies p
|
||||||
|
WHERE p.active = true
|
||||||
|
OR (
|
||||||
|
p.active = false
|
||||||
|
AND p.last_failed_at IS NOT NULL
|
||||||
|
AND p.last_failed_at < NOW() - (retry_hours || ' hours')::interval
|
||||||
|
AND p.failure_count < 10 -- Don't retry if too many failures
|
||||||
|
)
|
||||||
|
ORDER BY
|
||||||
|
p.active DESC, -- Prefer active proxies
|
||||||
|
p.failure_count ASC, -- Then prefer proxies with fewer failures
|
||||||
|
RANDOM();
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Create scheduled job to periodically re-enable proxies past their retry window
|
||||||
|
-- This runs every hour and marks proxies as active if they're past retry interval
|
||||||
|
CREATE OR REPLACE FUNCTION auto_reenable_proxies()
|
||||||
|
RETURNS INT AS $$
|
||||||
|
DECLARE
|
||||||
|
retry_hours INT;
|
||||||
|
max_failures INT;
|
||||||
|
reenabled_count INT;
|
||||||
|
BEGIN
|
||||||
|
-- Get settings
|
||||||
|
SELECT COALESCE(value::int, 4) INTO retry_hours
|
||||||
|
FROM settings WHERE key = 'proxy_retry_interval_hours';
|
||||||
|
|
||||||
|
SELECT COALESCE(value::int, 10) INTO max_failures
|
||||||
|
FROM settings WHERE key = 'proxy_max_failures_before_permanent';
|
||||||
|
|
||||||
|
-- Re-enable proxies that have cooled down
|
||||||
|
UPDATE proxies
|
||||||
|
SET active = true,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE active = false
|
||||||
|
AND last_failed_at IS NOT NULL
|
||||||
|
AND last_failed_at < NOW() - (retry_hours || ' hours')::interval
|
||||||
|
AND failure_count < max_failures;
|
||||||
|
|
||||||
|
GET DIAGNOSTICS reenabled_count = ROW_COUNT;
|
||||||
|
|
||||||
|
IF reenabled_count > 0 THEN
|
||||||
|
RAISE NOTICE 'Auto-reenabled % proxies after % hour cooldown', reenabled_count, retry_hours;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN reenabled_count;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Add index for efficient querying
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_proxies_retry
|
||||||
|
ON proxies(active, last_failed_at, failure_count);
|
||||||
|
|
||||||
|
COMMENT ON COLUMN proxies.last_failed_at IS 'Timestamp of last failure - used for auto-retry logic';
|
||||||
|
COMMENT ON FUNCTION auto_reenable_proxies() IS 'Call periodically to re-enable failed proxies that have cooled down';
|
||||||
20
backend/migrations/096_product_search_indexes.sql
Normal file
20
backend/migrations/096_product_search_indexes.sql
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
-- Migration: Add trigram indexes for fast ILIKE product searches
|
||||||
|
-- Enables fast searches on name_raw, brand_name_raw, and description
|
||||||
|
|
||||||
|
-- Enable pg_trgm extension if not already enabled
|
||||||
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||||
|
|
||||||
|
-- Create GIN trigram indexes for fast ILIKE searches
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_name_trgm
|
||||||
|
ON store_products USING gin (name_raw gin_trgm_ops);
|
||||||
|
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_brand_name_trgm
|
||||||
|
ON store_products USING gin (brand_name_raw gin_trgm_ops);
|
||||||
|
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_description_trgm
|
||||||
|
ON store_products USING gin (description gin_trgm_ops);
|
||||||
|
|
||||||
|
-- Add comment
|
||||||
|
COMMENT ON INDEX idx_store_products_name_trgm IS 'Trigram index for fast ILIKE searches on product name';
|
||||||
|
COMMENT ON INDEX idx_store_products_brand_name_trgm IS 'Trigram index for fast ILIKE searches on brand name';
|
||||||
|
COMMENT ON INDEX idx_store_products_description_trgm IS 'Trigram index for fast ILIKE searches on description';
|
||||||
11
backend/migrations/097_worker_tasks_worker_id_index.sql
Normal file
11
backend/migrations/097_worker_tasks_worker_id_index.sql
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
-- Migration: Add indexes for dashboard performance
|
||||||
|
-- Speeds up the tasks listing query with ORDER BY and JOIN
|
||||||
|
|
||||||
|
-- Index for JOIN with worker_registry
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_worker_tasks_worker_id
|
||||||
|
ON worker_tasks(worker_id)
|
||||||
|
WHERE worker_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- Index for ORDER BY created_at DESC (dashboard listing)
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_worker_tasks_created_at_desc
|
||||||
|
ON worker_tasks(created_at DESC);
|
||||||
13
backend/migrations/098_dispensary_stage_tracking.sql
Normal file
13
backend/migrations/098_dispensary_stage_tracking.sql
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
-- Migration: Add stage tracking columns to dispensaries table
|
||||||
|
-- Required for stage checkpoint feature in task handlers
|
||||||
|
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS consecutive_successes INTEGER DEFAULT 0;
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS consecutive_failures INTEGER DEFAULT 0;
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_successful_crawl_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
-- Indexes for finding stores by status
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_consecutive_successes
|
||||||
|
ON dispensaries(consecutive_successes) WHERE consecutive_successes > 0;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_consecutive_failures
|
||||||
|
ON dispensaries(consecutive_failures) WHERE consecutive_failures > 0;
|
||||||
68
backend/migrations/099_working_hours.sql
Normal file
68
backend/migrations/099_working_hours.sql
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
-- Migration: 099_working_hours.sql
|
||||||
|
-- Description: Working hours profiles for natural traffic pattern simulation
|
||||||
|
-- Created: 2024-12-13
|
||||||
|
|
||||||
|
-- Working hours table: defines hourly activity weights to mimic natural traffic
|
||||||
|
CREATE TABLE IF NOT EXISTS working_hours (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(50) UNIQUE NOT NULL,
|
||||||
|
description TEXT,
|
||||||
|
|
||||||
|
-- Hour weights: {"0": 15, "1": 5, ..., "18": 100, ...}
|
||||||
|
-- Value = percent chance to trigger activity that hour (0-100)
|
||||||
|
hour_weights JSONB NOT NULL,
|
||||||
|
|
||||||
|
-- Day-of-week multipliers (0=Sunday, 6=Saturday)
|
||||||
|
-- Optional adjustment for weekend vs weekday patterns
|
||||||
|
dow_weights JSONB DEFAULT '{"0": 90, "1": 100, "2": 100, "3": 100, "4": 100, "5": 110, "6": 95}',
|
||||||
|
|
||||||
|
timezone VARCHAR(50) DEFAULT 'America/Phoenix',
|
||||||
|
enabled BOOLEAN DEFAULT true,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Seed: Natural traffic pattern based on internet usage research
|
||||||
|
-- Optimized for cannabis dispensary browsing (lunch + after-work peaks)
|
||||||
|
INSERT INTO working_hours (name, description, timezone, hour_weights) VALUES (
|
||||||
|
'natural_traffic',
|
||||||
|
'Mimics natural user browsing patterns - peaks at lunch and 5-7 PM',
|
||||||
|
'America/Phoenix',
|
||||||
|
'{
|
||||||
|
"0": 15,
|
||||||
|
"1": 5,
|
||||||
|
"2": 5,
|
||||||
|
"3": 5,
|
||||||
|
"4": 5,
|
||||||
|
"5": 10,
|
||||||
|
"6": 20,
|
||||||
|
"7": 30,
|
||||||
|
"8": 35,
|
||||||
|
"9": 45,
|
||||||
|
"10": 50,
|
||||||
|
"11": 60,
|
||||||
|
"12": 75,
|
||||||
|
"13": 65,
|
||||||
|
"14": 60,
|
||||||
|
"15": 70,
|
||||||
|
"16": 80,
|
||||||
|
"17": 95,
|
||||||
|
"18": 100,
|
||||||
|
"19": 100,
|
||||||
|
"20": 90,
|
||||||
|
"21": 70,
|
||||||
|
"22": 45,
|
||||||
|
"23": 25
|
||||||
|
}'::jsonb
|
||||||
|
) ON CONFLICT (name) DO UPDATE SET
|
||||||
|
hour_weights = EXCLUDED.hour_weights,
|
||||||
|
description = EXCLUDED.description,
|
||||||
|
updated_at = NOW();
|
||||||
|
|
||||||
|
-- Index for quick lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_working_hours_name ON working_hours(name);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_working_hours_enabled ON working_hours(enabled);
|
||||||
|
|
||||||
|
COMMENT ON TABLE working_hours IS 'Activity profiles for natural traffic simulation. Hour weights are percent chance (0-100) to trigger activity.';
|
||||||
|
COMMENT ON COLUMN working_hours.hour_weights IS 'JSON object mapping hour (0-23) to percent chance (0-100). 100 = always run, 0 = never run.';
|
||||||
|
COMMENT ON COLUMN working_hours.dow_weights IS 'Optional day-of-week multipliers. 0=Sunday. Applied as (hour_weight * dow_weight / 100).';
|
||||||
19
backend/migrations/100_worker_timezone.sql
Normal file
19
backend/migrations/100_worker_timezone.sql
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
-- Migration: 100_worker_timezone.sql
|
||||||
|
-- Description: Add timezone column to worker_registry for working hours support
|
||||||
|
-- Created: 2024-12-13
|
||||||
|
|
||||||
|
-- Add timezone column to worker_registry
|
||||||
|
-- Populated from preflight IP geolocation (e.g., 'America/New_York')
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Add working_hours_id to link worker to a specific working hours profile
|
||||||
|
-- NULL means use default 'natural_traffic' profile
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS working_hours_id INTEGER REFERENCES working_hours(id);
|
||||||
|
|
||||||
|
-- Index for workers by timezone (useful for capacity planning)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_timezone ON worker_registry(timezone);
|
||||||
|
|
||||||
|
COMMENT ON COLUMN worker_registry.timezone IS 'IANA timezone from preflight IP geolocation (e.g., America/New_York)';
|
||||||
|
COMMENT ON COLUMN worker_registry.working_hours_id IS 'Reference to working_hours profile. NULL uses default natural_traffic.';
|
||||||
78
backend/migrations/101_worker_preflight_timezone.sql
Normal file
78
backend/migrations/101_worker_preflight_timezone.sql
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
-- Migration: 101_worker_preflight_timezone.sql
|
||||||
|
-- Description: Update update_worker_preflight to extract timezone from fingerprint
|
||||||
|
-- Created: 2024-12-13
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION public.update_worker_preflight(
|
||||||
|
p_worker_id character varying,
|
||||||
|
p_transport character varying,
|
||||||
|
p_status character varying,
|
||||||
|
p_ip character varying DEFAULT NULL,
|
||||||
|
p_response_ms integer DEFAULT NULL,
|
||||||
|
p_error text DEFAULT NULL,
|
||||||
|
p_fingerprint jsonb DEFAULT NULL
|
||||||
|
)
|
||||||
|
RETURNS void
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
AS $function$
|
||||||
|
DECLARE
|
||||||
|
v_curl_status VARCHAR(20);
|
||||||
|
v_http_status VARCHAR(20);
|
||||||
|
v_overall_status VARCHAR(20);
|
||||||
|
v_timezone VARCHAR(50);
|
||||||
|
BEGIN
|
||||||
|
IF p_transport = 'curl' THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_curl_status = p_status,
|
||||||
|
preflight_curl_at = NOW(),
|
||||||
|
preflight_curl_ms = p_response_ms,
|
||||||
|
preflight_curl_error = p_error,
|
||||||
|
curl_ip = p_ip,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
ELSIF p_transport = 'http' THEN
|
||||||
|
-- Extract timezone from fingerprint JSON if present
|
||||||
|
v_timezone := p_fingerprint->>'detectedTimezone';
|
||||||
|
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_http_status = p_status,
|
||||||
|
preflight_http_at = NOW(),
|
||||||
|
preflight_http_ms = p_response_ms,
|
||||||
|
preflight_http_error = p_error,
|
||||||
|
http_ip = p_ip,
|
||||||
|
fingerprint_data = COALESCE(p_fingerprint, fingerprint_data),
|
||||||
|
-- Save extracted timezone
|
||||||
|
timezone = COALESCE(v_timezone, timezone),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Update overall preflight status
|
||||||
|
SELECT preflight_curl_status, preflight_http_status
|
||||||
|
INTO v_curl_status, v_http_status
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
-- Compute overall status
|
||||||
|
IF v_curl_status = 'passed' AND v_http_status = 'passed' THEN
|
||||||
|
v_overall_status := 'passed';
|
||||||
|
ELSIF v_curl_status = 'passed' OR v_http_status = 'passed' THEN
|
||||||
|
v_overall_status := 'partial';
|
||||||
|
ELSIF v_curl_status = 'failed' OR v_http_status = 'failed' THEN
|
||||||
|
v_overall_status := 'failed';
|
||||||
|
ELSE
|
||||||
|
v_overall_status := 'pending';
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_status = v_overall_status,
|
||||||
|
preflight_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END;
|
||||||
|
$function$;
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION update_worker_preflight(varchar, varchar, varchar, varchar, integer, text, jsonb)
|
||||||
|
IS 'Updates worker preflight status and extracts timezone from fingerprint for working hours';
|
||||||
114
backend/migrations/102_check_working_hours.sql
Normal file
114
backend/migrations/102_check_working_hours.sql
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
-- Migration: 102_check_working_hours.sql
|
||||||
|
-- Description: Function to check if worker should be available based on working hours
|
||||||
|
-- Created: 2024-12-13
|
||||||
|
|
||||||
|
-- Function to check if a worker should be available for work
|
||||||
|
-- Returns TRUE if worker passes the probability check for current hour
|
||||||
|
-- Returns FALSE if worker should sleep/skip this cycle
|
||||||
|
CREATE OR REPLACE FUNCTION check_working_hours(
|
||||||
|
p_worker_id VARCHAR,
|
||||||
|
p_profile_name VARCHAR DEFAULT 'natural_traffic'
|
||||||
|
)
|
||||||
|
RETURNS TABLE (
|
||||||
|
is_available BOOLEAN,
|
||||||
|
current_hour INTEGER,
|
||||||
|
hour_weight INTEGER,
|
||||||
|
worker_timezone VARCHAR,
|
||||||
|
roll INTEGER,
|
||||||
|
reason TEXT
|
||||||
|
)
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
AS $function$
|
||||||
|
DECLARE
|
||||||
|
v_timezone VARCHAR(50);
|
||||||
|
v_hour INTEGER;
|
||||||
|
v_weight INTEGER;
|
||||||
|
v_dow INTEGER;
|
||||||
|
v_dow_weight INTEGER;
|
||||||
|
v_final_weight INTEGER;
|
||||||
|
v_roll INTEGER;
|
||||||
|
v_hour_weights JSONB;
|
||||||
|
v_dow_weights JSONB;
|
||||||
|
v_profile_enabled BOOLEAN;
|
||||||
|
BEGIN
|
||||||
|
-- Get worker's timezone (from preflight)
|
||||||
|
SELECT wr.timezone INTO v_timezone
|
||||||
|
FROM worker_registry wr
|
||||||
|
WHERE wr.worker_id = p_worker_id;
|
||||||
|
|
||||||
|
-- Default to America/Phoenix if no timezone set
|
||||||
|
v_timezone := COALESCE(v_timezone, 'America/Phoenix');
|
||||||
|
|
||||||
|
-- Get current hour in worker's timezone
|
||||||
|
v_hour := EXTRACT(HOUR FROM NOW() AT TIME ZONE v_timezone)::INTEGER;
|
||||||
|
|
||||||
|
-- Get day of week (0=Sunday)
|
||||||
|
v_dow := EXTRACT(DOW FROM NOW() AT TIME ZONE v_timezone)::INTEGER;
|
||||||
|
|
||||||
|
-- Get working hours profile
|
||||||
|
SELECT wh.hour_weights, wh.dow_weights, wh.enabled
|
||||||
|
INTO v_hour_weights, v_dow_weights, v_profile_enabled
|
||||||
|
FROM working_hours wh
|
||||||
|
WHERE wh.name = p_profile_name AND wh.enabled = true;
|
||||||
|
|
||||||
|
-- If profile not found or disabled, always available
|
||||||
|
IF v_hour_weights IS NULL THEN
|
||||||
|
RETURN QUERY SELECT
|
||||||
|
TRUE::BOOLEAN,
|
||||||
|
v_hour,
|
||||||
|
100::INTEGER,
|
||||||
|
v_timezone,
|
||||||
|
0::INTEGER,
|
||||||
|
'Profile not found or disabled - defaulting to available'::TEXT;
|
||||||
|
RETURN;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Get hour weight (default to 50 if hour not specified)
|
||||||
|
v_weight := COALESCE((v_hour_weights->>v_hour::TEXT)::INTEGER, 50);
|
||||||
|
|
||||||
|
-- Get day-of-week weight (default to 100)
|
||||||
|
v_dow_weight := COALESCE((v_dow_weights->>v_dow::TEXT)::INTEGER, 100);
|
||||||
|
|
||||||
|
-- Calculate final weight (hour_weight * dow_weight / 100)
|
||||||
|
v_final_weight := (v_weight * v_dow_weight / 100);
|
||||||
|
|
||||||
|
-- Roll the dice (0-99)
|
||||||
|
v_roll := floor(random() * 100)::INTEGER;
|
||||||
|
|
||||||
|
-- Return result
|
||||||
|
RETURN QUERY SELECT
|
||||||
|
(v_roll < v_final_weight)::BOOLEAN AS is_available,
|
||||||
|
v_hour AS current_hour,
|
||||||
|
v_final_weight AS hour_weight,
|
||||||
|
v_timezone AS worker_timezone,
|
||||||
|
v_roll AS roll,
|
||||||
|
CASE
|
||||||
|
WHEN v_roll < v_final_weight THEN
|
||||||
|
format('Available: rolled %s < %s%% threshold', v_roll, v_final_weight)
|
||||||
|
ELSE
|
||||||
|
format('Sleeping: rolled %s >= %s%% threshold', v_roll, v_final_weight)
|
||||||
|
END AS reason;
|
||||||
|
END;
|
||||||
|
$function$;
|
||||||
|
|
||||||
|
-- Simplified version that just returns boolean
|
||||||
|
CREATE OR REPLACE FUNCTION is_worker_available(
|
||||||
|
p_worker_id VARCHAR,
|
||||||
|
p_profile_name VARCHAR DEFAULT 'natural_traffic'
|
||||||
|
)
|
||||||
|
RETURNS BOOLEAN
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
AS $function$
|
||||||
|
DECLARE
|
||||||
|
v_result BOOLEAN;
|
||||||
|
BEGIN
|
||||||
|
SELECT is_available INTO v_result
|
||||||
|
FROM check_working_hours(p_worker_id, p_profile_name);
|
||||||
|
RETURN COALESCE(v_result, TRUE);
|
||||||
|
END;
|
||||||
|
$function$;
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION check_working_hours(VARCHAR, VARCHAR) IS
|
||||||
|
'Check if worker should be available based on working hours profile. Returns detailed info.';
|
||||||
|
COMMENT ON FUNCTION is_worker_available(VARCHAR, VARCHAR) IS
|
||||||
|
'Simple boolean check if worker passes working hours probability roll.';
|
||||||
12
backend/migrations/103_schedule_dispensary_id.sql
Normal file
12
backend/migrations/103_schedule_dispensary_id.sql
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
-- Migration: 103_schedule_dispensary_id.sql
|
||||||
|
-- Description: Add dispensary_id to task_schedules for per-store schedules
|
||||||
|
-- Created: 2025-12-13
|
||||||
|
|
||||||
|
-- Add dispensary_id column for single-store schedules
|
||||||
|
ALTER TABLE task_schedules
|
||||||
|
ADD COLUMN IF NOT EXISTS dispensary_id INTEGER REFERENCES dispensaries(id);
|
||||||
|
|
||||||
|
-- Index for quick lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_task_schedules_dispensary_id ON task_schedules(dispensary_id);
|
||||||
|
|
||||||
|
COMMENT ON COLUMN task_schedules.dispensary_id IS 'For single-store schedules. If set, only this store is refreshed. If NULL, uses state_code for all stores in state.';
|
||||||
25
backend/migrations/104_task_source_tracking.sql
Normal file
25
backend/migrations/104_task_source_tracking.sql
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
-- Migration 104: Add source tracking to worker_tasks
|
||||||
|
-- Purpose: Track WHERE tasks are created from (schedule vs API endpoint)
|
||||||
|
--
|
||||||
|
-- All automated task creation should be visible in task_schedules.
|
||||||
|
-- This column helps identify "phantom" tasks created outside the schedule system.
|
||||||
|
|
||||||
|
-- Add source column to worker_tasks
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS source VARCHAR(100);
|
||||||
|
|
||||||
|
-- Add source_id column (references schedule_id if from a schedule)
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS source_schedule_id INTEGER REFERENCES task_schedules(id);
|
||||||
|
|
||||||
|
-- Add request metadata (IP, user agent) for debugging
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS source_metadata JSONB;
|
||||||
|
|
||||||
|
-- Create index for querying by source
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_source ON worker_tasks(source);
|
||||||
|
|
||||||
|
-- Comment explaining source values
|
||||||
|
COMMENT ON COLUMN worker_tasks.source IS 'Task creation source: schedule, api_run_now, api_crawl_state, api_batch_staggered, api_batch_az_stores, task_chain, manual';
|
||||||
|
COMMENT ON COLUMN worker_tasks.source_schedule_id IS 'ID of the schedule that created this task (if source=schedule or source=api_run_now)';
|
||||||
|
COMMENT ON COLUMN worker_tasks.source_metadata IS 'Request metadata: {ip, user_agent, endpoint, timestamp}';
|
||||||
25
backend/migrations/105_dashboard_performance_indexes.sql
Normal file
25
backend/migrations/105_dashboard_performance_indexes.sql
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
-- Migration 105: Add indexes for dashboard performance
|
||||||
|
-- Purpose: Speed up the /dashboard and /national/summary endpoints
|
||||||
|
--
|
||||||
|
-- These queries were identified as slow:
|
||||||
|
-- 1. COUNT(*) FROM store_product_snapshots WHERE captured_at >= NOW() - INTERVAL '24 hours'
|
||||||
|
-- 2. National summary aggregate queries
|
||||||
|
|
||||||
|
-- Index for snapshot counts by time (used in dashboard)
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_product_snapshots_captured_at
|
||||||
|
ON store_product_snapshots(captured_at DESC);
|
||||||
|
|
||||||
|
-- Index for crawl traces by time and success (used in dashboard)
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_crawl_traces_started_success
|
||||||
|
ON crawl_orchestration_traces(started_at DESC, success);
|
||||||
|
|
||||||
|
-- Partial index for recent failed crawls (faster for dashboard alerts)
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_crawl_traces_recent_failures
|
||||||
|
ON crawl_orchestration_traces(started_at DESC)
|
||||||
|
WHERE success = false;
|
||||||
|
|
||||||
|
-- Composite index for store_products aggregations by dispensary
|
||||||
|
-- Helps with national summary state metrics query
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_dispensary_brand
|
||||||
|
ON store_products(dispensary_id, brand_name_raw)
|
||||||
|
WHERE brand_name_raw IS NOT NULL;
|
||||||
10
backend/migrations/106_rename_store_discovery_schedule.sql
Normal file
10
backend/migrations/106_rename_store_discovery_schedule.sql
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
-- Migration: 106_rename_store_discovery_schedule.sql
|
||||||
|
-- Description: Rename store_discovery_dutchie to 'Store Discovery'
|
||||||
|
-- Created: 2025-12-13
|
||||||
|
|
||||||
|
-- Update the schedule name for better display
|
||||||
|
-- The platform='dutchie' field is preserved for badge display in UI
|
||||||
|
UPDATE task_schedules
|
||||||
|
SET name = 'Store Discovery',
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE name = 'store_discovery_dutchie';
|
||||||
23
backend/migrations/107_proxy_tracking.sql
Normal file
23
backend/migrations/107_proxy_tracking.sql
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
-- Migration: 107_proxy_tracking.sql
|
||||||
|
-- Description: Add proxy tracking columns to worker_tasks for geo-targeting visibility
|
||||||
|
-- Created: 2025-12-13
|
||||||
|
|
||||||
|
-- Add proxy tracking columns to worker_tasks
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS proxy_ip VARCHAR(45);
|
||||||
|
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS proxy_geo VARCHAR(100);
|
||||||
|
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS proxy_source VARCHAR(10);
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON COLUMN worker_tasks.proxy_ip IS 'IP address of proxy used for this task';
|
||||||
|
COMMENT ON COLUMN worker_tasks.proxy_geo IS 'Geo target used (e.g., "arizona", "phoenix, arizona")';
|
||||||
|
COMMENT ON COLUMN worker_tasks.proxy_source IS 'Source of proxy: "api" (Evomi dynamic) or "static" (fallback table)';
|
||||||
|
|
||||||
|
-- Index for proxy analysis
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_proxy_ip
|
||||||
|
ON worker_tasks(proxy_ip)
|
||||||
|
WHERE proxy_ip IS NOT NULL;
|
||||||
231
backend/migrations/108_worker_geo_sessions.sql
Normal file
231
backend/migrations/108_worker_geo_sessions.sql
Normal file
@@ -0,0 +1,231 @@
|
|||||||
|
-- Migration: 108_worker_geo_sessions.sql
|
||||||
|
-- Description: Add geo session tracking to worker_registry for state-based task assignment
|
||||||
|
-- Created: 2025-12-13
|
||||||
|
|
||||||
|
-- Worker geo session columns
|
||||||
|
-- Worker qualifies with a geo (state/city), then only claims tasks matching that geo
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS current_state VARCHAR(2);
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS current_city VARCHAR(100);
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS geo_session_started_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS session_task_count INT DEFAULT 0;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS session_max_tasks INT DEFAULT 7;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS proxy_geo VARCHAR(100);
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON COLUMN worker_registry.current_state IS 'Worker''s current geo assignment (US state code, e.g., AZ)';
|
||||||
|
COMMENT ON COLUMN worker_registry.current_city IS 'Worker''s current city assignment (optional, e.g., phoenix)';
|
||||||
|
COMMENT ON COLUMN worker_registry.geo_session_started_at IS 'When worker''s current geo session started';
|
||||||
|
COMMENT ON COLUMN worker_registry.session_task_count IS 'Number of tasks completed in current geo session';
|
||||||
|
COMMENT ON COLUMN worker_registry.session_max_tasks IS 'Max tasks per geo session before re-qualification (default 7)';
|
||||||
|
COMMENT ON COLUMN worker_registry.proxy_geo IS 'Geo target string used for proxy (e.g., "arizona" or "phoenix, arizona")';
|
||||||
|
|
||||||
|
-- Index for finding workers by state
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_current_state
|
||||||
|
ON worker_registry(current_state)
|
||||||
|
WHERE current_state IS NOT NULL;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- UPDATED claim_task FUNCTION
|
||||||
|
-- Now filters by worker's geo session state
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION claim_task(
|
||||||
|
p_role VARCHAR(50),
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_curl_passed BOOLEAN DEFAULT TRUE,
|
||||||
|
p_http_passed BOOLEAN DEFAULT FALSE
|
||||||
|
) RETURNS worker_tasks AS $$
|
||||||
|
DECLARE
|
||||||
|
claimed_task worker_tasks;
|
||||||
|
worker_state VARCHAR(2);
|
||||||
|
session_valid BOOLEAN;
|
||||||
|
session_tasks INT;
|
||||||
|
max_tasks INT;
|
||||||
|
BEGIN
|
||||||
|
-- Get worker's current geo session info
|
||||||
|
SELECT
|
||||||
|
current_state,
|
||||||
|
session_task_count,
|
||||||
|
session_max_tasks,
|
||||||
|
(geo_session_started_at IS NOT NULL AND geo_session_started_at > NOW() - INTERVAL '60 minutes')
|
||||||
|
INTO worker_state, session_tasks, max_tasks, session_valid
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
-- If no valid geo session, or session exhausted, worker can't claim tasks
|
||||||
|
-- Worker must re-qualify first
|
||||||
|
IF worker_state IS NULL OR NOT session_valid OR session_tasks >= COALESCE(max_tasks, 7) THEN
|
||||||
|
RETURN NULL;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Claim task matching worker's state
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'claimed',
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
claimed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT wt.id FROM worker_tasks wt
|
||||||
|
JOIN dispensaries d ON wt.dispensary_id = d.id
|
||||||
|
WHERE wt.role = p_role
|
||||||
|
AND wt.status = 'pending'
|
||||||
|
AND (wt.scheduled_for IS NULL OR wt.scheduled_for <= NOW())
|
||||||
|
-- GEO FILTER: Task's dispensary must match worker's state
|
||||||
|
AND d.state = worker_state
|
||||||
|
-- Method compatibility: worker must have passed the required preflight
|
||||||
|
AND (
|
||||||
|
wt.method IS NULL -- No preference, any worker can claim
|
||||||
|
OR (wt.method = 'curl' AND p_curl_passed = TRUE)
|
||||||
|
OR (wt.method = 'http' AND p_http_passed = TRUE)
|
||||||
|
)
|
||||||
|
-- Exclude stores that already have an active task
|
||||||
|
AND (wt.dispensary_id IS NULL OR wt.dispensary_id NOT IN (
|
||||||
|
SELECT dispensary_id FROM worker_tasks
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND dispensary_id IS NOT NULL
|
||||||
|
))
|
||||||
|
ORDER BY wt.priority DESC, wt.created_at ASC
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_task;
|
||||||
|
|
||||||
|
-- If task claimed, increment session task count
|
||||||
|
-- Note: Use claimed_task.id IS NOT NULL (not claimed_task IS NOT NULL)
|
||||||
|
-- PostgreSQL composite type NULL check quirk
|
||||||
|
IF claimed_task.id IS NOT NULL THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET session_task_count = session_task_count + 1
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN claimed_task;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- FUNCTION: assign_worker_geo
|
||||||
|
-- Assigns a geo session to a worker based on demand
|
||||||
|
-- Returns the assigned state, or NULL if no tasks available
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION assign_worker_geo(
|
||||||
|
p_worker_id VARCHAR(100)
|
||||||
|
) RETURNS VARCHAR(2) AS $$
|
||||||
|
DECLARE
|
||||||
|
assigned_state VARCHAR(2);
|
||||||
|
BEGIN
|
||||||
|
-- Find state with highest demand (pending tasks) and lowest coverage (workers)
|
||||||
|
SELECT d.state INTO assigned_state
|
||||||
|
FROM dispensaries d
|
||||||
|
JOIN worker_tasks wt ON wt.dispensary_id = d.id
|
||||||
|
LEFT JOIN worker_registry wr ON wr.current_state = d.state
|
||||||
|
AND wr.status = 'active'
|
||||||
|
AND wr.geo_session_started_at > NOW() - INTERVAL '60 minutes'
|
||||||
|
WHERE wt.status = 'pending'
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
|
GROUP BY d.state
|
||||||
|
ORDER BY
|
||||||
|
COUNT(wt.id) DESC, -- Most pending tasks first
|
||||||
|
COUNT(DISTINCT wr.worker_id) ASC -- Fewest workers second
|
||||||
|
LIMIT 1;
|
||||||
|
|
||||||
|
-- If no pending tasks anywhere, return NULL
|
||||||
|
IF assigned_state IS NULL THEN
|
||||||
|
RETURN NULL;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Assign the state to this worker
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
current_state = assigned_state,
|
||||||
|
current_city = NULL, -- City assigned later if available
|
||||||
|
geo_session_started_at = NOW(),
|
||||||
|
session_task_count = 0
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
RETURN assigned_state;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- FUNCTION: check_worker_geo_session
|
||||||
|
-- Returns info about worker's current geo session
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION check_worker_geo_session(
|
||||||
|
p_worker_id VARCHAR(100)
|
||||||
|
) RETURNS TABLE (
|
||||||
|
current_state VARCHAR(2),
|
||||||
|
current_city VARCHAR(100),
|
||||||
|
session_valid BOOLEAN,
|
||||||
|
session_tasks_remaining INT,
|
||||||
|
session_minutes_remaining INT
|
||||||
|
) AS $$
|
||||||
|
BEGIN
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT
|
||||||
|
wr.current_state,
|
||||||
|
wr.current_city,
|
||||||
|
(wr.geo_session_started_at IS NOT NULL AND wr.geo_session_started_at > NOW() - INTERVAL '60 minutes') as session_valid,
|
||||||
|
GREATEST(0, wr.session_max_tasks - wr.session_task_count) as session_tasks_remaining,
|
||||||
|
GREATEST(0, EXTRACT(EPOCH FROM (wr.geo_session_started_at + INTERVAL '60 minutes' - NOW())) / 60)::INT as session_minutes_remaining
|
||||||
|
FROM worker_registry wr
|
||||||
|
WHERE wr.worker_id = p_worker_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- View for worker thinness per state
|
||||||
|
-- Derives states from dispensaries table - no external states table dependency
|
||||||
|
CREATE OR REPLACE VIEW worker_state_capacity AS
|
||||||
|
WITH active_states AS (
|
||||||
|
-- Get unique states from dispensaries with valid platform IDs
|
||||||
|
SELECT DISTINCT state as code
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE state IS NOT NULL
|
||||||
|
AND platform_dispensary_id IS NOT NULL
|
||||||
|
),
|
||||||
|
pending_by_state AS (
|
||||||
|
SELECT d.state, COUNT(*) as count
|
||||||
|
FROM worker_tasks t
|
||||||
|
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||||
|
WHERE t.status = 'pending'
|
||||||
|
AND d.state IS NOT NULL
|
||||||
|
GROUP BY d.state
|
||||||
|
),
|
||||||
|
workers_by_state AS (
|
||||||
|
SELECT
|
||||||
|
current_state,
|
||||||
|
COUNT(*) as count,
|
||||||
|
SUM(GREATEST(0, session_max_tasks - session_task_count)) as remaining_capacity
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE status IN ('active', 'idle') -- Include both active and idle workers
|
||||||
|
AND preflight_http_status = 'passed'
|
||||||
|
AND current_state IS NOT NULL
|
||||||
|
AND geo_session_started_at > NOW() - INTERVAL '60 minutes'
|
||||||
|
GROUP BY current_state
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
s.code as state,
|
||||||
|
s.code as state_name, -- Use code as name since we don't have a states lookup table
|
||||||
|
COALESCE(p.count, 0) as pending_tasks,
|
||||||
|
COALESCE(w.count, 0) as workers_on_state,
|
||||||
|
COALESCE(w.remaining_capacity, 0) as remaining_capacity,
|
||||||
|
CASE
|
||||||
|
WHEN COALESCE(w.remaining_capacity, 0) = 0 AND COALESCE(p.count, 0) > 0 THEN 'no_coverage'
|
||||||
|
WHEN COALESCE(w.remaining_capacity, 0) < COALESCE(p.count, 0) THEN 'thin'
|
||||||
|
ELSE 'ok'
|
||||||
|
END as status
|
||||||
|
FROM active_states s
|
||||||
|
LEFT JOIN pending_by_state p ON p.state = s.code
|
||||||
|
LEFT JOIN workers_by_state w ON w.current_state = s.code
|
||||||
|
ORDER BY COALESCE(p.count, 0) DESC;
|
||||||
354
backend/migrations/109_worker_identity_pool.sql
Normal file
354
backend/migrations/109_worker_identity_pool.sql
Normal file
@@ -0,0 +1,354 @@
|
|||||||
|
-- Migration: 109_worker_identity_pool.sql
|
||||||
|
-- Description: Identity pool for diverse IP/fingerprint rotation
|
||||||
|
-- Created: 2025-12-14
|
||||||
|
--
|
||||||
|
-- Workers claim identities (IP + fingerprint) from pool.
|
||||||
|
-- Each identity used for 3-5 tasks, then cools down 2-3 hours.
|
||||||
|
-- This creates natural browsing patterns - same person doesn't hit 20 stores.
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- IDENTITY POOL TABLE
|
||||||
|
-- ============================================================
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_identities (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Evomi session controls the IP
|
||||||
|
session_id VARCHAR(100) UNIQUE NOT NULL,
|
||||||
|
|
||||||
|
-- Detected IP from this session
|
||||||
|
ip_address INET,
|
||||||
|
|
||||||
|
-- Geo targeting
|
||||||
|
state_code VARCHAR(2) NOT NULL,
|
||||||
|
city VARCHAR(100), -- City-level targeting for diversity
|
||||||
|
|
||||||
|
-- Fingerprint data (UA, timezone, locale, device, etc.)
|
||||||
|
fingerprint JSONB NOT NULL,
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_used_at TIMESTAMPTZ,
|
||||||
|
cooldown_until TIMESTAMPTZ, -- Can't reuse until this time
|
||||||
|
|
||||||
|
-- Usage stats
|
||||||
|
total_tasks_completed INT DEFAULT 0,
|
||||||
|
total_sessions INT DEFAULT 1, -- How many times this identity has been used
|
||||||
|
|
||||||
|
-- Current state
|
||||||
|
is_active BOOLEAN DEFAULT FALSE, -- Currently claimed by a worker
|
||||||
|
active_worker_id VARCHAR(100), -- Which worker has it
|
||||||
|
|
||||||
|
-- Health tracking
|
||||||
|
consecutive_failures INT DEFAULT 0,
|
||||||
|
is_healthy BOOLEAN DEFAULT TRUE -- Set false if IP gets blocked
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for efficient lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_identities_state_city
|
||||||
|
ON worker_identities(state_code, city);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_identities_available
|
||||||
|
ON worker_identities(state_code, is_active, cooldown_until)
|
||||||
|
WHERE is_healthy = TRUE;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_identities_cooldown
|
||||||
|
ON worker_identities(cooldown_until)
|
||||||
|
WHERE is_healthy = TRUE AND is_active = FALSE;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- METRO AREA MAPPING
|
||||||
|
-- For fallback when exact city not available
|
||||||
|
-- ============================================================
|
||||||
|
CREATE TABLE IF NOT EXISTS metro_areas (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
metro_name VARCHAR(100) NOT NULL,
|
||||||
|
state_code VARCHAR(2) NOT NULL,
|
||||||
|
city VARCHAR(100) NOT NULL,
|
||||||
|
is_primary BOOLEAN DEFAULT FALSE, -- Primary city of the metro
|
||||||
|
UNIQUE(state_code, city)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Phoenix Metro Area
|
||||||
|
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||||
|
('Phoenix Metro', 'AZ', 'Phoenix', TRUE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Mesa', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Glendale', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Tempe', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Scottsdale', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Chandler', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Peoria', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'El Mirage', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Tolleson', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Sun City', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Apache Junction', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Cave Creek', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Gilbert', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Surprise', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Avondale', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Goodyear', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Buckeye', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Queen Creek', FALSE)
|
||||||
|
ON CONFLICT (state_code, city) DO NOTHING;
|
||||||
|
|
||||||
|
-- Tucson Metro Area
|
||||||
|
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||||
|
('Tucson Metro', 'AZ', 'Tucson', TRUE),
|
||||||
|
('Tucson Metro', 'AZ', 'Oro Valley', FALSE),
|
||||||
|
('Tucson Metro', 'AZ', 'Marana', FALSE),
|
||||||
|
('Tucson Metro', 'AZ', 'Sahuarita', FALSE),
|
||||||
|
('Tucson Metro', 'AZ', 'South Tucson', FALSE)
|
||||||
|
ON CONFLICT (state_code, city) DO NOTHING;
|
||||||
|
|
||||||
|
-- Flagstaff Area
|
||||||
|
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||||
|
('Flagstaff Area', 'AZ', 'Flagstaff', TRUE),
|
||||||
|
('Flagstaff Area', 'AZ', 'Sedona', FALSE)
|
||||||
|
ON CONFLICT (state_code, city) DO NOTHING;
|
||||||
|
|
||||||
|
-- Prescott Area
|
||||||
|
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||||
|
('Prescott Area', 'AZ', 'Prescott', TRUE),
|
||||||
|
('Prescott Area', 'AZ', 'Prescott Valley', FALSE)
|
||||||
|
ON CONFLICT (state_code, city) DO NOTHING;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- FUNCTION: claim_identity
|
||||||
|
-- Claims an available identity for a worker
|
||||||
|
-- Tries: exact city -> metro area -> any in state -> create new
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION claim_identity(
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_state_code VARCHAR(2),
|
||||||
|
p_city VARCHAR(100) DEFAULT NULL
|
||||||
|
) RETURNS worker_identities AS $$
|
||||||
|
DECLARE
|
||||||
|
claimed_identity worker_identities;
|
||||||
|
metro_name_val VARCHAR(100);
|
||||||
|
primary_city VARCHAR(100);
|
||||||
|
BEGIN
|
||||||
|
-- 1. Try exact city match (if city provided)
|
||||||
|
IF p_city IS NOT NULL THEN
|
||||||
|
UPDATE worker_identities
|
||||||
|
SET is_active = TRUE,
|
||||||
|
active_worker_id = p_worker_id,
|
||||||
|
last_used_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_identities
|
||||||
|
WHERE state_code = p_state_code
|
||||||
|
AND city = p_city
|
||||||
|
AND is_active = FALSE
|
||||||
|
AND is_healthy = TRUE
|
||||||
|
AND (cooldown_until IS NULL OR cooldown_until < NOW())
|
||||||
|
ORDER BY last_used_at ASC NULLS FIRST
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_identity;
|
||||||
|
|
||||||
|
IF claimed_identity.id IS NOT NULL THEN
|
||||||
|
RETURN claimed_identity;
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- 2. Try metro area fallback
|
||||||
|
IF p_city IS NOT NULL THEN
|
||||||
|
-- Find the metro area for this city
|
||||||
|
SELECT ma.metro_name INTO metro_name_val
|
||||||
|
FROM metro_areas ma
|
||||||
|
WHERE ma.state_code = p_state_code AND ma.city = p_city;
|
||||||
|
|
||||||
|
IF metro_name_val IS NOT NULL THEN
|
||||||
|
-- Get primary city of metro
|
||||||
|
SELECT ma.city INTO primary_city
|
||||||
|
FROM metro_areas ma
|
||||||
|
WHERE ma.metro_name = metro_name_val AND ma.is_primary = TRUE;
|
||||||
|
|
||||||
|
-- Try any city in same metro
|
||||||
|
UPDATE worker_identities wi
|
||||||
|
SET is_active = TRUE,
|
||||||
|
active_worker_id = p_worker_id,
|
||||||
|
last_used_at = NOW()
|
||||||
|
WHERE wi.id = (
|
||||||
|
SELECT wi2.id FROM worker_identities wi2
|
||||||
|
JOIN metro_areas ma ON wi2.city = ma.city AND wi2.state_code = ma.state_code
|
||||||
|
WHERE ma.metro_name = metro_name_val
|
||||||
|
AND wi2.is_active = FALSE
|
||||||
|
AND wi2.is_healthy = TRUE
|
||||||
|
AND (wi2.cooldown_until IS NULL OR wi2.cooldown_until < NOW())
|
||||||
|
ORDER BY wi2.last_used_at ASC NULLS FIRST
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_identity;
|
||||||
|
|
||||||
|
IF claimed_identity.id IS NOT NULL THEN
|
||||||
|
RETURN claimed_identity;
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- 3. Try any identity in state
|
||||||
|
UPDATE worker_identities
|
||||||
|
SET is_active = TRUE,
|
||||||
|
active_worker_id = p_worker_id,
|
||||||
|
last_used_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_identities
|
||||||
|
WHERE state_code = p_state_code
|
||||||
|
AND is_active = FALSE
|
||||||
|
AND is_healthy = TRUE
|
||||||
|
AND (cooldown_until IS NULL OR cooldown_until < NOW())
|
||||||
|
ORDER BY last_used_at ASC NULLS FIRST
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_identity;
|
||||||
|
|
||||||
|
-- Return whatever we got (NULL if nothing available - caller should create new)
|
||||||
|
RETURN claimed_identity;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- FUNCTION: release_identity
|
||||||
|
-- Releases an identity back to pool with cooldown
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION release_identity(
|
||||||
|
p_identity_id INT,
|
||||||
|
p_tasks_completed INT DEFAULT 0,
|
||||||
|
p_failed BOOLEAN DEFAULT FALSE
|
||||||
|
) RETURNS VOID AS $$
|
||||||
|
DECLARE
|
||||||
|
cooldown_hours FLOAT;
|
||||||
|
BEGIN
|
||||||
|
-- Random cooldown between 2-3 hours for diversity
|
||||||
|
cooldown_hours := 2 + random(); -- 2.0 to 3.0 hours
|
||||||
|
|
||||||
|
UPDATE worker_identities
|
||||||
|
SET is_active = FALSE,
|
||||||
|
active_worker_id = NULL,
|
||||||
|
total_tasks_completed = total_tasks_completed + p_tasks_completed,
|
||||||
|
total_sessions = total_sessions + 1,
|
||||||
|
cooldown_until = NOW() + (cooldown_hours || ' hours')::INTERVAL,
|
||||||
|
consecutive_failures = CASE WHEN p_failed THEN consecutive_failures + 1 ELSE 0 END,
|
||||||
|
is_healthy = CASE WHEN consecutive_failures >= 3 THEN FALSE ELSE TRUE END
|
||||||
|
WHERE id = p_identity_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- FUNCTION: get_pending_tasks_by_geo
|
||||||
|
-- Gets pending tasks grouped by state/city for identity assignment
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION get_pending_tasks_by_geo(
|
||||||
|
p_limit INT DEFAULT 10
|
||||||
|
) RETURNS TABLE (
|
||||||
|
state_code VARCHAR(2),
|
||||||
|
city VARCHAR(100),
|
||||||
|
pending_count BIGINT,
|
||||||
|
available_identities BIGINT
|
||||||
|
) AS $$
|
||||||
|
BEGIN
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT
|
||||||
|
d.state as state_code,
|
||||||
|
d.city,
|
||||||
|
COUNT(t.id) as pending_count,
|
||||||
|
(
|
||||||
|
SELECT COUNT(*) FROM worker_identities wi
|
||||||
|
WHERE wi.state_code = d.state
|
||||||
|
AND (wi.city = d.city OR wi.city IS NULL)
|
||||||
|
AND wi.is_active = FALSE
|
||||||
|
AND wi.is_healthy = TRUE
|
||||||
|
AND (wi.cooldown_until IS NULL OR wi.cooldown_until < NOW())
|
||||||
|
) as available_identities
|
||||||
|
FROM worker_tasks t
|
||||||
|
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||||
|
WHERE t.status = 'pending'
|
||||||
|
AND d.state IS NOT NULL
|
||||||
|
GROUP BY d.state, d.city
|
||||||
|
ORDER BY COUNT(t.id) DESC
|
||||||
|
LIMIT p_limit;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- FUNCTION: get_tasks_for_identity
|
||||||
|
-- Gets tasks matching an identity's geo (same city or metro)
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION get_tasks_for_identity(
|
||||||
|
p_state_code VARCHAR(2),
|
||||||
|
p_city VARCHAR(100),
|
||||||
|
p_limit INT DEFAULT 5
|
||||||
|
) RETURNS TABLE (
|
||||||
|
task_id INT,
|
||||||
|
dispensary_id INT,
|
||||||
|
dispensary_name VARCHAR(255),
|
||||||
|
dispensary_city VARCHAR(100),
|
||||||
|
role VARCHAR(50)
|
||||||
|
) AS $$
|
||||||
|
DECLARE
|
||||||
|
metro_name_val VARCHAR(100);
|
||||||
|
BEGIN
|
||||||
|
-- Find metro area for this city
|
||||||
|
SELECT ma.metro_name INTO metro_name_val
|
||||||
|
FROM metro_areas ma
|
||||||
|
WHERE ma.state_code = p_state_code AND ma.city = p_city;
|
||||||
|
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT
|
||||||
|
t.id as task_id,
|
||||||
|
d.id as dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city as dispensary_city,
|
||||||
|
t.role
|
||||||
|
FROM worker_tasks t
|
||||||
|
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||||
|
WHERE t.status = 'pending'
|
||||||
|
AND d.state = p_state_code
|
||||||
|
AND (
|
||||||
|
-- Exact city match
|
||||||
|
d.city = p_city
|
||||||
|
-- Or same metro area
|
||||||
|
OR (metro_name_val IS NOT NULL AND d.city IN (
|
||||||
|
SELECT ma.city FROM metro_areas ma WHERE ma.metro_name = metro_name_val
|
||||||
|
))
|
||||||
|
-- Or any in state if no metro
|
||||||
|
OR (metro_name_val IS NULL)
|
||||||
|
)
|
||||||
|
ORDER BY
|
||||||
|
CASE WHEN d.city = p_city THEN 0 ELSE 1 END, -- Prefer exact city
|
||||||
|
t.priority DESC,
|
||||||
|
t.created_at ASC
|
||||||
|
LIMIT p_limit;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW: identity_pool_status
|
||||||
|
-- Overview of identity pool health and availability
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE VIEW identity_pool_status AS
|
||||||
|
SELECT
|
||||||
|
state_code,
|
||||||
|
city,
|
||||||
|
COUNT(*) as total_identities,
|
||||||
|
COUNT(*) FILTER (WHERE is_active) as active,
|
||||||
|
COUNT(*) FILTER (WHERE NOT is_active AND is_healthy AND (cooldown_until IS NULL OR cooldown_until < NOW())) as available,
|
||||||
|
COUNT(*) FILTER (WHERE NOT is_active AND cooldown_until > NOW()) as cooling_down,
|
||||||
|
COUNT(*) FILTER (WHERE NOT is_healthy) as unhealthy,
|
||||||
|
SUM(total_tasks_completed) as total_tasks,
|
||||||
|
AVG(total_tasks_completed)::INT as avg_tasks_per_identity
|
||||||
|
FROM worker_identities
|
||||||
|
GROUP BY state_code, city
|
||||||
|
ORDER BY state_code, city;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- Comments
|
||||||
|
-- ============================================================
|
||||||
|
COMMENT ON TABLE worker_identities IS 'Pool of IP/fingerprint identities for worker rotation';
|
||||||
|
COMMENT ON TABLE metro_areas IS 'City groupings for geographic fallback matching';
|
||||||
|
COMMENT ON FUNCTION claim_identity IS 'Claim an available identity: exact city -> metro -> state -> NULL (create new)';
|
||||||
|
COMMENT ON FUNCTION release_identity IS 'Release identity with 2-3 hour random cooldown';
|
||||||
|
COMMENT ON FUNCTION get_pending_tasks_by_geo IS 'Get pending task counts by state/city';
|
||||||
|
COMMENT ON FUNCTION get_tasks_for_identity IS 'Get tasks matching identity geo (city or metro area)';
|
||||||
92
backend/migrations/110_trusted_origins.sql
Normal file
92
backend/migrations/110_trusted_origins.sql
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
-- Migration: 110_trusted_origins.sql
|
||||||
|
-- Description: Trusted origins for API access without token
|
||||||
|
-- Created: 2024-12-14
|
||||||
|
--
|
||||||
|
-- Manages which domains, IPs, and patterns can access the API without a Bearer token.
|
||||||
|
-- Used by auth middleware to grant 'internal' role to trusted requests.
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- TRUSTED ORIGINS TABLE
|
||||||
|
-- ============================================================
|
||||||
|
CREATE TABLE IF NOT EXISTS trusted_origins (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Origin identification
|
||||||
|
name VARCHAR(100) NOT NULL, -- Friendly name (e.g., "CannaIQ Production")
|
||||||
|
origin_type VARCHAR(20) NOT NULL, -- 'domain', 'ip', or 'pattern'
|
||||||
|
origin_value VARCHAR(255) NOT NULL, -- The actual value to match
|
||||||
|
|
||||||
|
-- Metadata
|
||||||
|
description TEXT, -- Optional notes
|
||||||
|
active BOOLEAN DEFAULT TRUE,
|
||||||
|
|
||||||
|
-- Tracking
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
created_by INTEGER REFERENCES users(id),
|
||||||
|
|
||||||
|
-- Constraints
|
||||||
|
CONSTRAINT valid_origin_type CHECK (origin_type IN ('domain', 'ip', 'pattern')),
|
||||||
|
UNIQUE(origin_type, origin_value)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for active lookups (used by auth middleware)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_trusted_origins_active
|
||||||
|
ON trusted_origins(active) WHERE active = TRUE;
|
||||||
|
|
||||||
|
-- Updated at trigger
|
||||||
|
CREATE OR REPLACE FUNCTION update_trusted_origins_updated_at()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.updated_at = NOW();
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
DROP TRIGGER IF EXISTS trusted_origins_updated_at ON trusted_origins;
|
||||||
|
CREATE TRIGGER trusted_origins_updated_at
|
||||||
|
BEFORE UPDATE ON trusted_origins
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION update_trusted_origins_updated_at();
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- SEED DEFAULT TRUSTED ORIGINS
|
||||||
|
-- These match the hardcoded fallbacks in middleware.ts
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Production domains
|
||||||
|
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||||
|
('CannaIQ Production', 'domain', 'https://cannaiq.co', 'Main CannaIQ dashboard'),
|
||||||
|
('CannaIQ Production (www)', 'domain', 'https://www.cannaiq.co', 'Main CannaIQ dashboard with www'),
|
||||||
|
('FindADispo Production', 'domain', 'https://findadispo.com', 'Consumer dispensary finder'),
|
||||||
|
('FindADispo Production (www)', 'domain', 'https://www.findadispo.com', 'Consumer dispensary finder with www'),
|
||||||
|
('Findagram Production', 'domain', 'https://findagram.co', 'Instagram-style cannabis discovery'),
|
||||||
|
('Findagram Production (www)', 'domain', 'https://www.findagram.co', 'Instagram-style cannabis discovery with www')
|
||||||
|
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||||
|
|
||||||
|
-- Wildcard patterns
|
||||||
|
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||||
|
('CannaBrands Subdomains', 'pattern', '^https://.*\\.cannabrands\\.app$', 'All *.cannabrands.app subdomains'),
|
||||||
|
('CannaIQ Subdomains', 'pattern', '^https://.*\\.cannaiq\\.co$', 'All *.cannaiq.co subdomains')
|
||||||
|
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||||
|
|
||||||
|
-- Local development
|
||||||
|
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||||
|
('Local API', 'domain', 'http://localhost:3010', 'Local backend API'),
|
||||||
|
('Local Admin', 'domain', 'http://localhost:8080', 'Local admin dashboard'),
|
||||||
|
('Local Vite Dev', 'domain', 'http://localhost:5173', 'Vite dev server')
|
||||||
|
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||||
|
|
||||||
|
-- Trusted IPs (localhost)
|
||||||
|
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||||
|
('Localhost IPv4', 'ip', '127.0.0.1', 'Local machine'),
|
||||||
|
('Localhost IPv6', 'ip', '::1', 'Local machine IPv6'),
|
||||||
|
('Localhost IPv6 Mapped', 'ip', '::ffff:127.0.0.1', 'IPv6-mapped IPv4 localhost')
|
||||||
|
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- COMMENTS
|
||||||
|
-- ============================================================
|
||||||
|
COMMENT ON TABLE trusted_origins IS 'Domains, IPs, and patterns that can access API without token';
|
||||||
|
COMMENT ON COLUMN trusted_origins.origin_type IS 'domain = exact URL match, ip = IP address, pattern = regex pattern';
|
||||||
|
COMMENT ON COLUMN trusted_origins.origin_value IS 'For domain: full URL. For ip: IP address. For pattern: regex string';
|
||||||
35
backend/migrations/111_system_settings.sql
Normal file
35
backend/migrations/111_system_settings.sql
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
-- Migration: 111_system_settings.sql
|
||||||
|
-- Description: System settings table for runtime configuration
|
||||||
|
-- Created: 2024-12-14
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS system_settings (
|
||||||
|
key VARCHAR(100) PRIMARY KEY,
|
||||||
|
value TEXT NOT NULL,
|
||||||
|
description TEXT,
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_by INTEGER REFERENCES users(id)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Task pool gate - controls whether workers can claim tasks
|
||||||
|
INSERT INTO system_settings (key, value, description) VALUES
|
||||||
|
('task_pool_open', 'true', 'When false, workers cannot claim new tasks from the pool')
|
||||||
|
ON CONFLICT (key) DO NOTHING;
|
||||||
|
|
||||||
|
-- Updated at trigger
|
||||||
|
CREATE OR REPLACE FUNCTION update_system_settings_updated_at()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.updated_at = NOW();
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
DROP TRIGGER IF EXISTS system_settings_updated_at ON system_settings;
|
||||||
|
CREATE TRIGGER system_settings_updated_at
|
||||||
|
BEFORE UPDATE ON system_settings
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION update_system_settings_updated_at();
|
||||||
|
|
||||||
|
COMMENT ON TABLE system_settings IS 'Runtime configuration settings';
|
||||||
|
COMMENT ON COLUMN system_settings.key IS 'Setting name (e.g., task_pool_open)';
|
||||||
|
COMMENT ON COLUMN system_settings.value IS 'Setting value as string';
|
||||||
390
backend/migrations/112_worker_session_pool.sql
Normal file
390
backend/migrations/112_worker_session_pool.sql
Normal file
@@ -0,0 +1,390 @@
|
|||||||
|
-- Migration 112: Worker Session Pool
|
||||||
|
-- Tracks IP/fingerprint sessions with exclusive locks and cooldowns
|
||||||
|
-- Each worker claims up to 6 tasks, uses one IP/fingerprint for those tasks,
|
||||||
|
-- then retires the session (8hr cooldown before IP can be reused)
|
||||||
|
|
||||||
|
-- Drop old identity pool tables if they exist (replacing with simpler session model)
|
||||||
|
DROP TABLE IF EXISTS worker_identity_claims CASCADE;
|
||||||
|
DROP TABLE IF EXISTS worker_identities CASCADE;
|
||||||
|
|
||||||
|
-- Worker sessions: tracks active and cooling down IP/fingerprint pairs
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_sessions (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- IP and fingerprint for this session
|
||||||
|
ip_address VARCHAR(45) NOT NULL,
|
||||||
|
fingerprint_hash VARCHAR(64) NOT NULL,
|
||||||
|
fingerprint_data JSONB,
|
||||||
|
|
||||||
|
-- Geo this session is locked to
|
||||||
|
state_code VARCHAR(2) NOT NULL,
|
||||||
|
city VARCHAR(100),
|
||||||
|
|
||||||
|
-- Ownership
|
||||||
|
worker_id VARCHAR(255), -- NULL if in cooldown
|
||||||
|
|
||||||
|
-- Status: 'active' (locked to worker), 'cooldown' (8hr wait), 'available'
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'available',
|
||||||
|
|
||||||
|
-- Task tracking
|
||||||
|
tasks_claimed INTEGER NOT NULL DEFAULT 0,
|
||||||
|
tasks_completed INTEGER NOT NULL DEFAULT 0,
|
||||||
|
tasks_failed INTEGER NOT NULL DEFAULT 0,
|
||||||
|
max_tasks INTEGER NOT NULL DEFAULT 6,
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
locked_at TIMESTAMPTZ, -- When worker locked this session
|
||||||
|
retired_at TIMESTAMPTZ, -- When session was retired (cooldown starts)
|
||||||
|
cooldown_until TIMESTAMPTZ, -- When session becomes available again
|
||||||
|
|
||||||
|
-- Constraints
|
||||||
|
CONSTRAINT valid_status CHECK (status IN ('active', 'cooldown', 'available'))
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for fast lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_sessions_ip ON worker_sessions(ip_address);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_sessions_status ON worker_sessions(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_sessions_worker ON worker_sessions(worker_id) WHERE worker_id IS NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_sessions_geo ON worker_sessions(state_code, city);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_sessions_cooldown ON worker_sessions(cooldown_until) WHERE status = 'cooldown';
|
||||||
|
|
||||||
|
-- Unique constraint: only one active session per IP
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_worker_sessions_active_ip
|
||||||
|
ON worker_sessions(ip_address)
|
||||||
|
WHERE status = 'active';
|
||||||
|
|
||||||
|
-- Function: Check if IP is available (not active, not in cooldown)
|
||||||
|
CREATE OR REPLACE FUNCTION is_ip_available(check_ip VARCHAR(45))
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
-- Check if any session has this IP and is either active or in cooldown
|
||||||
|
RETURN NOT EXISTS (
|
||||||
|
SELECT 1 FROM worker_sessions
|
||||||
|
WHERE ip_address = check_ip
|
||||||
|
AND (status = 'active' OR (status = 'cooldown' AND cooldown_until > NOW()))
|
||||||
|
);
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Lock a session to a worker
|
||||||
|
-- Returns the session if successful, NULL if IP not available
|
||||||
|
CREATE OR REPLACE FUNCTION lock_worker_session(
|
||||||
|
p_worker_id VARCHAR(255),
|
||||||
|
p_ip_address VARCHAR(45),
|
||||||
|
p_state_code VARCHAR(2),
|
||||||
|
p_city VARCHAR(100) DEFAULT NULL,
|
||||||
|
p_fingerprint_hash VARCHAR(64) DEFAULT NULL,
|
||||||
|
p_fingerprint_data JSONB DEFAULT NULL
|
||||||
|
) RETURNS worker_sessions AS $$
|
||||||
|
DECLARE
|
||||||
|
v_session worker_sessions;
|
||||||
|
BEGIN
|
||||||
|
-- First check if IP is available
|
||||||
|
IF NOT is_ip_available(p_ip_address) THEN
|
||||||
|
RETURN NULL;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Try to find an existing available session for this IP
|
||||||
|
SELECT * INTO v_session
|
||||||
|
FROM worker_sessions
|
||||||
|
WHERE ip_address = p_ip_address
|
||||||
|
AND status = 'available'
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
LIMIT 1;
|
||||||
|
|
||||||
|
IF v_session.id IS NOT NULL THEN
|
||||||
|
-- Reuse existing session
|
||||||
|
UPDATE worker_sessions SET
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
status = 'active',
|
||||||
|
state_code = p_state_code,
|
||||||
|
city = p_city,
|
||||||
|
fingerprint_hash = COALESCE(p_fingerprint_hash, fingerprint_hash),
|
||||||
|
fingerprint_data = COALESCE(p_fingerprint_data, fingerprint_data),
|
||||||
|
tasks_claimed = 0,
|
||||||
|
tasks_completed = 0,
|
||||||
|
tasks_failed = 0,
|
||||||
|
locked_at = NOW(),
|
||||||
|
retired_at = NULL,
|
||||||
|
cooldown_until = NULL
|
||||||
|
WHERE id = v_session.id
|
||||||
|
RETURNING * INTO v_session;
|
||||||
|
ELSE
|
||||||
|
-- Create new session
|
||||||
|
INSERT INTO worker_sessions (
|
||||||
|
ip_address, fingerprint_hash, fingerprint_data,
|
||||||
|
state_code, city, worker_id, status, locked_at
|
||||||
|
) VALUES (
|
||||||
|
p_ip_address, COALESCE(p_fingerprint_hash, md5(random()::text)),
|
||||||
|
p_fingerprint_data, p_state_code, p_city, p_worker_id, 'active', NOW()
|
||||||
|
)
|
||||||
|
RETURNING * INTO v_session;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN v_session;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Retire a session (start 8hr cooldown)
|
||||||
|
CREATE OR REPLACE FUNCTION retire_worker_session(p_worker_id VARCHAR(255))
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
DECLARE
|
||||||
|
v_updated INTEGER;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_sessions SET
|
||||||
|
status = 'cooldown',
|
||||||
|
worker_id = NULL,
|
||||||
|
retired_at = NOW(),
|
||||||
|
cooldown_until = NOW() + INTERVAL '8 hours'
|
||||||
|
WHERE worker_id = p_worker_id
|
||||||
|
AND status = 'active';
|
||||||
|
|
||||||
|
GET DIAGNOSTICS v_updated = ROW_COUNT;
|
||||||
|
RETURN v_updated > 0;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Release expired cooldowns
|
||||||
|
CREATE OR REPLACE FUNCTION release_expired_sessions()
|
||||||
|
RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
v_released INTEGER;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_sessions SET
|
||||||
|
status = 'available'
|
||||||
|
WHERE status = 'cooldown'
|
||||||
|
AND cooldown_until <= NOW();
|
||||||
|
|
||||||
|
GET DIAGNOSTICS v_released = ROW_COUNT;
|
||||||
|
RETURN v_released;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Get session for worker
|
||||||
|
CREATE OR REPLACE FUNCTION get_worker_session(p_worker_id VARCHAR(255))
|
||||||
|
RETURNS worker_sessions AS $$
|
||||||
|
SELECT * FROM worker_sessions
|
||||||
|
WHERE worker_id = p_worker_id AND status = 'active'
|
||||||
|
LIMIT 1;
|
||||||
|
$$ LANGUAGE sql;
|
||||||
|
|
||||||
|
-- Function: Increment task counters
|
||||||
|
CREATE OR REPLACE FUNCTION session_task_completed(p_worker_id VARCHAR(255))
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_sessions SET
|
||||||
|
tasks_completed = tasks_completed + 1
|
||||||
|
WHERE worker_id = p_worker_id AND status = 'active';
|
||||||
|
RETURN FOUND;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION session_task_failed(p_worker_id VARCHAR(255))
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_sessions SET
|
||||||
|
tasks_failed = tasks_failed + 1
|
||||||
|
WHERE worker_id = p_worker_id AND status = 'active';
|
||||||
|
RETURN FOUND;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION session_task_claimed(p_worker_id VARCHAR(255), p_count INTEGER DEFAULT 1)
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_sessions SET
|
||||||
|
tasks_claimed = tasks_claimed + p_count
|
||||||
|
WHERE worker_id = p_worker_id AND status = 'active';
|
||||||
|
RETURN FOUND;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Scheduled job hint: Run release_expired_sessions() every 5 minutes
|
||||||
|
COMMENT ON FUNCTION release_expired_sessions() IS
|
||||||
|
'Run periodically to release sessions from cooldown. Suggest: every 5 minutes.';
|
||||||
|
|
||||||
|
-- =============================================================================
|
||||||
|
-- ATOMIC TASK CLAIMING
|
||||||
|
-- Worker claims up to 6 tasks for same geo in one transaction
|
||||||
|
-- =============================================================================
|
||||||
|
|
||||||
|
-- Function: Claim up to N tasks for same geo
|
||||||
|
-- Returns claimed tasks with dispensary geo info
|
||||||
|
CREATE OR REPLACE FUNCTION claim_tasks_batch(
|
||||||
|
p_worker_id VARCHAR(255),
|
||||||
|
p_max_tasks INTEGER DEFAULT 6,
|
||||||
|
p_role VARCHAR(50) DEFAULT NULL -- Optional role filter
|
||||||
|
) RETURNS TABLE (
|
||||||
|
task_id INTEGER,
|
||||||
|
role VARCHAR(50),
|
||||||
|
dispensary_id INTEGER,
|
||||||
|
dispensary_name VARCHAR(255),
|
||||||
|
city VARCHAR(100),
|
||||||
|
state_code VARCHAR(2),
|
||||||
|
platform VARCHAR(50),
|
||||||
|
method VARCHAR(20)
|
||||||
|
) AS $$
|
||||||
|
DECLARE
|
||||||
|
v_target_state VARCHAR(2);
|
||||||
|
v_target_city VARCHAR(100);
|
||||||
|
v_claimed_count INTEGER := 0;
|
||||||
|
BEGIN
|
||||||
|
-- First, find the geo with most pending tasks to target
|
||||||
|
SELECT d.state, d.city INTO v_target_state, v_target_city
|
||||||
|
FROM worker_tasks t
|
||||||
|
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||||
|
WHERE t.status = 'pending'
|
||||||
|
AND (p_role IS NULL OR t.role = p_role)
|
||||||
|
GROUP BY d.state, d.city
|
||||||
|
ORDER BY COUNT(*) DESC
|
||||||
|
LIMIT 1;
|
||||||
|
|
||||||
|
-- No pending tasks
|
||||||
|
IF v_target_state IS NULL THEN
|
||||||
|
RETURN;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Claim up to p_max_tasks for this geo
|
||||||
|
RETURN QUERY
|
||||||
|
WITH claimed AS (
|
||||||
|
UPDATE worker_tasks t SET
|
||||||
|
status = 'claimed',
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
claimed_at = NOW()
|
||||||
|
FROM (
|
||||||
|
SELECT t2.id
|
||||||
|
FROM worker_tasks t2
|
||||||
|
JOIN dispensaries d ON t2.dispensary_id = d.id
|
||||||
|
WHERE t2.status = 'pending'
|
||||||
|
AND d.state = v_target_state
|
||||||
|
AND (v_target_city IS NULL OR d.city = v_target_city)
|
||||||
|
AND (p_role IS NULL OR t2.role = p_role)
|
||||||
|
ORDER BY t2.priority DESC, t2.created_at ASC
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
LIMIT p_max_tasks
|
||||||
|
) sub
|
||||||
|
WHERE t.id = sub.id
|
||||||
|
RETURNING t.id, t.role, t.dispensary_id, t.method
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
c.id as task_id,
|
||||||
|
c.role,
|
||||||
|
c.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city,
|
||||||
|
d.state as state_code,
|
||||||
|
d.platform,
|
||||||
|
c.method
|
||||||
|
FROM claimed c
|
||||||
|
JOIN dispensaries d ON c.dispensary_id = d.id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Release claimed tasks back to pending (for failed worker or cleanup)
|
||||||
|
CREATE OR REPLACE FUNCTION release_claimed_tasks(p_worker_id VARCHAR(255))
|
||||||
|
RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
v_released INTEGER;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_tasks SET
|
||||||
|
status = 'pending',
|
||||||
|
worker_id = NULL,
|
||||||
|
claimed_at = NULL
|
||||||
|
WHERE worker_id = p_worker_id
|
||||||
|
AND status IN ('claimed', 'running');
|
||||||
|
|
||||||
|
GET DIAGNOSTICS v_released = ROW_COUNT;
|
||||||
|
RETURN v_released;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Mark task as running
|
||||||
|
CREATE OR REPLACE FUNCTION start_task(p_task_id INTEGER, p_worker_id VARCHAR(255))
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_tasks SET
|
||||||
|
status = 'running',
|
||||||
|
started_at = NOW()
|
||||||
|
WHERE id = p_task_id
|
||||||
|
AND worker_id = p_worker_id
|
||||||
|
AND status = 'claimed';
|
||||||
|
RETURN FOUND;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Mark task as completed (leaves pool)
|
||||||
|
CREATE OR REPLACE FUNCTION complete_task(
|
||||||
|
p_task_id INTEGER,
|
||||||
|
p_worker_id VARCHAR(255),
|
||||||
|
p_result JSONB DEFAULT NULL
|
||||||
|
) RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_tasks SET
|
||||||
|
status = 'completed',
|
||||||
|
completed_at = NOW(),
|
||||||
|
result = p_result
|
||||||
|
WHERE id = p_task_id
|
||||||
|
AND worker_id = p_worker_id
|
||||||
|
AND status = 'running';
|
||||||
|
RETURN FOUND;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Mark task as failed (returns to pending for retry)
|
||||||
|
CREATE OR REPLACE FUNCTION fail_task(
|
||||||
|
p_task_id INTEGER,
|
||||||
|
p_worker_id VARCHAR(255),
|
||||||
|
p_error TEXT DEFAULT NULL,
|
||||||
|
p_max_retries INTEGER DEFAULT 3
|
||||||
|
) RETURNS BOOLEAN AS $$
|
||||||
|
DECLARE
|
||||||
|
v_retry_count INTEGER;
|
||||||
|
BEGIN
|
||||||
|
-- Get current retry count
|
||||||
|
SELECT COALESCE(retry_count, 0) INTO v_retry_count
|
||||||
|
FROM worker_tasks WHERE id = p_task_id;
|
||||||
|
|
||||||
|
IF v_retry_count >= p_max_retries THEN
|
||||||
|
-- Max retries exceeded - mark as permanently failed
|
||||||
|
UPDATE worker_tasks SET
|
||||||
|
status = 'failed',
|
||||||
|
completed_at = NOW(),
|
||||||
|
error_message = p_error,
|
||||||
|
retry_count = v_retry_count + 1
|
||||||
|
WHERE id = p_task_id
|
||||||
|
AND worker_id = p_worker_id;
|
||||||
|
ELSE
|
||||||
|
-- Return to pending for retry
|
||||||
|
UPDATE worker_tasks SET
|
||||||
|
status = 'pending',
|
||||||
|
worker_id = NULL,
|
||||||
|
claimed_at = NULL,
|
||||||
|
started_at = NULL,
|
||||||
|
error_message = p_error,
|
||||||
|
retry_count = v_retry_count + 1
|
||||||
|
WHERE id = p_task_id
|
||||||
|
AND worker_id = p_worker_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN FOUND;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Add retry_count column if not exists
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'worker_tasks' AND column_name = 'retry_count'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE worker_tasks ADD COLUMN retry_count INTEGER NOT NULL DEFAULT 0;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'worker_tasks' AND column_name = 'claimed_at'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE worker_tasks ADD COLUMN claimed_at TIMESTAMPTZ;
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
381
backend/migrations/113_task_pools.sql
Normal file
381
backend/migrations/113_task_pools.sql
Normal file
@@ -0,0 +1,381 @@
|
|||||||
|
-- Task Pools: Group tasks by geo area for worker assignment
|
||||||
|
-- Workers claim a pool, get proxy for that geo, then pull tasks from pool
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- TASK POOLS TABLE
|
||||||
|
-- ============================================================================
|
||||||
|
-- Each pool represents a metro area (e.g., Phoenix AZ = 100mi radius)
|
||||||
|
-- Dispensaries are assigned to pools based on location
|
||||||
|
-- Workers claim a pool, not individual tasks
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS task_pools (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(100) NOT NULL UNIQUE, -- e.g., 'phoenix_az'
|
||||||
|
display_name VARCHAR(100) NOT NULL, -- e.g., 'Phoenix, AZ'
|
||||||
|
state_code VARCHAR(2) NOT NULL, -- e.g., 'AZ'
|
||||||
|
city VARCHAR(100) NOT NULL, -- e.g., 'Phoenix'
|
||||||
|
latitude DECIMAL(10, 6) NOT NULL, -- pool center lat
|
||||||
|
longitude DECIMAL(10, 6) NOT NULL, -- pool center lng
|
||||||
|
radius_miles INTEGER DEFAULT 100, -- pool radius (100mi default)
|
||||||
|
timezone VARCHAR(50) NOT NULL, -- e.g., 'America/Phoenix'
|
||||||
|
is_active BOOLEAN DEFAULT true,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for active pools
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_task_pools_active ON task_pools(is_active) WHERE is_active = true;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- LINK DISPENSARIES TO POOLS
|
||||||
|
-- ============================================================================
|
||||||
|
-- Add pool_id to dispensaries table
|
||||||
|
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS pool_id INTEGER REFERENCES task_pools(id);
|
||||||
|
|
||||||
|
-- Index for pool membership
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_pool ON dispensaries(pool_id) WHERE pool_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- WORKER POOL ASSIGNMENT
|
||||||
|
-- ============================================================================
|
||||||
|
-- Track which pool a worker is currently assigned to
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS current_pool_id INTEGER REFERENCES task_pools(id),
|
||||||
|
ADD COLUMN IF NOT EXISTS pool_claimed_at TIMESTAMPTZ,
|
||||||
|
ADD COLUMN IF NOT EXISTS pool_stores_visited INTEGER DEFAULT 0,
|
||||||
|
ADD COLUMN IF NOT EXISTS pool_max_stores INTEGER DEFAULT 6;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- SEED INITIAL POOLS
|
||||||
|
-- ============================================================================
|
||||||
|
-- Major cannabis markets with approximate center coordinates
|
||||||
|
|
||||||
|
INSERT INTO task_pools (name, display_name, state_code, city, latitude, longitude, timezone, radius_miles) VALUES
|
||||||
|
-- Arizona
|
||||||
|
('phoenix_az', 'Phoenix, AZ', 'AZ', 'Phoenix', 33.4484, -112.0740, 'America/Phoenix', 100),
|
||||||
|
('tucson_az', 'Tucson, AZ', 'AZ', 'Tucson', 32.2226, -110.9747, 'America/Phoenix', 75),
|
||||||
|
|
||||||
|
-- California
|
||||||
|
('los_angeles_ca', 'Los Angeles, CA', 'CA', 'Los Angeles', 34.0522, -118.2437, 'America/Los_Angeles', 100),
|
||||||
|
('san_francisco_ca', 'San Francisco, CA', 'CA', 'San Francisco', 37.7749, -122.4194, 'America/Los_Angeles', 75),
|
||||||
|
('san_diego_ca', 'San Diego, CA', 'CA', 'San Diego', 32.7157, -117.1611, 'America/Los_Angeles', 75),
|
||||||
|
('sacramento_ca', 'Sacramento, CA', 'CA', 'Sacramento', 38.5816, -121.4944, 'America/Los_Angeles', 75),
|
||||||
|
|
||||||
|
-- Colorado
|
||||||
|
('denver_co', 'Denver, CO', 'CO', 'Denver', 39.7392, -104.9903, 'America/Denver', 100),
|
||||||
|
|
||||||
|
-- Illinois
|
||||||
|
('chicago_il', 'Chicago, IL', 'IL', 'Chicago', 41.8781, -87.6298, 'America/Chicago', 100),
|
||||||
|
|
||||||
|
-- Massachusetts
|
||||||
|
('boston_ma', 'Boston, MA', 'MA', 'Boston', 42.3601, -71.0589, 'America/New_York', 75),
|
||||||
|
|
||||||
|
-- Michigan
|
||||||
|
('detroit_mi', 'Detroit, MI', 'MI', 'Detroit', 42.3314, -83.0458, 'America/Detroit', 100),
|
||||||
|
|
||||||
|
-- Nevada
|
||||||
|
('las_vegas_nv', 'Las Vegas, NV', 'NV', 'Las Vegas', 36.1699, -115.1398, 'America/Los_Angeles', 75),
|
||||||
|
('reno_nv', 'Reno, NV', 'NV', 'Reno', 39.5296, -119.8138, 'America/Los_Angeles', 50),
|
||||||
|
|
||||||
|
-- New Jersey
|
||||||
|
('newark_nj', 'Newark, NJ', 'NJ', 'Newark', 40.7357, -74.1724, 'America/New_York', 75),
|
||||||
|
|
||||||
|
-- New York
|
||||||
|
('new_york_ny', 'New York, NY', 'NY', 'New York', 40.7128, -74.0060, 'America/New_York', 75),
|
||||||
|
|
||||||
|
-- Oklahoma
|
||||||
|
('oklahoma_city_ok', 'Oklahoma City, OK', 'OK', 'Oklahoma City', 35.4676, -97.5164, 'America/Chicago', 100),
|
||||||
|
('tulsa_ok', 'Tulsa, OK', 'OK', 'Tulsa', 36.1540, -95.9928, 'America/Chicago', 75),
|
||||||
|
|
||||||
|
-- Oregon
|
||||||
|
('portland_or', 'Portland, OR', 'OR', 'Portland', 45.5152, -122.6784, 'America/Los_Angeles', 75),
|
||||||
|
|
||||||
|
-- Washington
|
||||||
|
('seattle_wa', 'Seattle, WA', 'WA', 'Seattle', 47.6062, -122.3321, 'America/Los_Angeles', 100)
|
||||||
|
|
||||||
|
ON CONFLICT (name) DO NOTHING;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- FUNCTION: Assign dispensary to nearest pool
|
||||||
|
-- ============================================================================
|
||||||
|
CREATE OR REPLACE FUNCTION assign_dispensary_to_pool(disp_id INTEGER)
|
||||||
|
RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
disp_lat DECIMAL(10,6);
|
||||||
|
disp_lng DECIMAL(10,6);
|
||||||
|
nearest_pool_id INTEGER;
|
||||||
|
BEGIN
|
||||||
|
-- Get dispensary coordinates
|
||||||
|
SELECT latitude, longitude INTO disp_lat, disp_lng
|
||||||
|
FROM dispensaries WHERE id = disp_id;
|
||||||
|
|
||||||
|
IF disp_lat IS NULL OR disp_lng IS NULL THEN
|
||||||
|
RETURN NULL;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Find nearest active pool within radius
|
||||||
|
-- Using Haversine approximation (accurate enough for 100mi)
|
||||||
|
SELECT id INTO nearest_pool_id
|
||||||
|
FROM task_pools
|
||||||
|
WHERE is_active = true
|
||||||
|
AND (
|
||||||
|
3959 * acos(
|
||||||
|
cos(radians(latitude)) * cos(radians(disp_lat)) *
|
||||||
|
cos(radians(disp_lng) - radians(longitude)) +
|
||||||
|
sin(radians(latitude)) * sin(radians(disp_lat))
|
||||||
|
)
|
||||||
|
) <= radius_miles
|
||||||
|
ORDER BY (
|
||||||
|
3959 * acos(
|
||||||
|
cos(radians(latitude)) * cos(radians(disp_lat)) *
|
||||||
|
cos(radians(disp_lng) - radians(longitude)) +
|
||||||
|
sin(radians(latitude)) * sin(radians(disp_lat))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
LIMIT 1;
|
||||||
|
|
||||||
|
-- Update dispensary
|
||||||
|
IF nearest_pool_id IS NOT NULL THEN
|
||||||
|
UPDATE dispensaries SET pool_id = nearest_pool_id WHERE id = disp_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN nearest_pool_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- FUNCTION: Assign all dispensaries to pools (batch)
|
||||||
|
-- ============================================================================
|
||||||
|
CREATE OR REPLACE FUNCTION assign_all_dispensaries_to_pools()
|
||||||
|
RETURNS TABLE(assigned INTEGER, unassigned INTEGER) AS $$
|
||||||
|
DECLARE
|
||||||
|
assigned_count INTEGER := 0;
|
||||||
|
unassigned_count INTEGER := 0;
|
||||||
|
disp RECORD;
|
||||||
|
pool_id INTEGER;
|
||||||
|
BEGIN
|
||||||
|
FOR disp IN SELECT id FROM dispensaries WHERE pool_id IS NULL AND latitude IS NOT NULL LOOP
|
||||||
|
pool_id := assign_dispensary_to_pool(disp.id);
|
||||||
|
IF pool_id IS NOT NULL THEN
|
||||||
|
assigned_count := assigned_count + 1;
|
||||||
|
ELSE
|
||||||
|
unassigned_count := unassigned_count + 1;
|
||||||
|
END IF;
|
||||||
|
END LOOP;
|
||||||
|
|
||||||
|
RETURN QUERY SELECT assigned_count, unassigned_count;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- FUNCTION: Get pools with pending tasks
|
||||||
|
-- ============================================================================
|
||||||
|
CREATE OR REPLACE FUNCTION get_pools_with_pending_tasks()
|
||||||
|
RETURNS TABLE(
|
||||||
|
pool_id INTEGER,
|
||||||
|
pool_name VARCHAR(100),
|
||||||
|
display_name VARCHAR(100),
|
||||||
|
state_code VARCHAR(2),
|
||||||
|
city VARCHAR(100),
|
||||||
|
timezone VARCHAR(50),
|
||||||
|
pending_count BIGINT,
|
||||||
|
store_count BIGINT
|
||||||
|
) AS $$
|
||||||
|
BEGIN
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT
|
||||||
|
tp.id as pool_id,
|
||||||
|
tp.name as pool_name,
|
||||||
|
tp.display_name,
|
||||||
|
tp.state_code,
|
||||||
|
tp.city,
|
||||||
|
tp.timezone,
|
||||||
|
COUNT(DISTINCT t.id) as pending_count,
|
||||||
|
COUNT(DISTINCT d.id) as store_count
|
||||||
|
FROM task_pools tp
|
||||||
|
JOIN dispensaries d ON d.pool_id = tp.id
|
||||||
|
JOIN tasks t ON t.dispensary_id = d.id AND t.status = 'pending'
|
||||||
|
WHERE tp.is_active = true
|
||||||
|
GROUP BY tp.id, tp.name, tp.display_name, tp.state_code, tp.city, tp.timezone
|
||||||
|
HAVING COUNT(DISTINCT t.id) > 0
|
||||||
|
ORDER BY COUNT(DISTINCT t.id) DESC;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- FUNCTION: Worker claims a pool
|
||||||
|
-- ============================================================================
|
||||||
|
CREATE OR REPLACE FUNCTION worker_claim_pool(
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_pool_id INTEGER DEFAULT NULL
|
||||||
|
)
|
||||||
|
RETURNS TABLE(
|
||||||
|
pool_id INTEGER,
|
||||||
|
pool_name VARCHAR(100),
|
||||||
|
display_name VARCHAR(100),
|
||||||
|
state_code VARCHAR(2),
|
||||||
|
city VARCHAR(100),
|
||||||
|
latitude DECIMAL(10,6),
|
||||||
|
longitude DECIMAL(10,6),
|
||||||
|
timezone VARCHAR(50)
|
||||||
|
) AS $$
|
||||||
|
DECLARE
|
||||||
|
claimed_pool_id INTEGER;
|
||||||
|
BEGIN
|
||||||
|
-- If no pool specified, pick the one with most pending tasks
|
||||||
|
IF p_pool_id IS NULL THEN
|
||||||
|
SELECT tp.id INTO claimed_pool_id
|
||||||
|
FROM task_pools tp
|
||||||
|
JOIN dispensaries d ON d.pool_id = tp.id
|
||||||
|
JOIN tasks t ON t.dispensary_id = d.id AND t.status = 'pending'
|
||||||
|
WHERE tp.is_active = true
|
||||||
|
GROUP BY tp.id
|
||||||
|
ORDER BY COUNT(DISTINCT t.id) DESC
|
||||||
|
LIMIT 1;
|
||||||
|
ELSE
|
||||||
|
claimed_pool_id := p_pool_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF claimed_pool_id IS NULL THEN
|
||||||
|
RETURN;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Update worker registry with pool assignment
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
current_pool_id = claimed_pool_id,
|
||||||
|
pool_claimed_at = NOW(),
|
||||||
|
pool_stores_visited = 0,
|
||||||
|
pool_max_stores = 6,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
-- Return pool info
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT
|
||||||
|
tp.id,
|
||||||
|
tp.name,
|
||||||
|
tp.display_name,
|
||||||
|
tp.state_code,
|
||||||
|
tp.city,
|
||||||
|
tp.latitude,
|
||||||
|
tp.longitude,
|
||||||
|
tp.timezone
|
||||||
|
FROM task_pools tp
|
||||||
|
WHERE tp.id = claimed_pool_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- FUNCTION: Pull tasks from worker's pool (up to 6 stores)
|
||||||
|
-- ============================================================================
|
||||||
|
CREATE OR REPLACE FUNCTION pull_tasks_from_pool(
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_max_stores INTEGER DEFAULT 6
|
||||||
|
)
|
||||||
|
RETURNS TABLE(
|
||||||
|
task_id INTEGER,
|
||||||
|
dispensary_id INTEGER,
|
||||||
|
dispensary_name VARCHAR(255),
|
||||||
|
role VARCHAR(50),
|
||||||
|
platform VARCHAR(50),
|
||||||
|
method VARCHAR(20)
|
||||||
|
) AS $$
|
||||||
|
DECLARE
|
||||||
|
worker_pool_id INTEGER;
|
||||||
|
stores_visited INTEGER;
|
||||||
|
max_stores INTEGER;
|
||||||
|
stores_remaining INTEGER;
|
||||||
|
BEGIN
|
||||||
|
-- Get worker's current pool and store count
|
||||||
|
SELECT current_pool_id, pool_stores_visited, pool_max_stores
|
||||||
|
INTO worker_pool_id, stores_visited, max_stores
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
IF worker_pool_id IS NULL THEN
|
||||||
|
RAISE EXCEPTION 'Worker % has no pool assigned', p_worker_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
stores_remaining := max_stores - stores_visited;
|
||||||
|
IF stores_remaining <= 0 THEN
|
||||||
|
RETURN; -- Worker exhausted
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Claim tasks from pool (one task per store, up to remaining capacity)
|
||||||
|
RETURN QUERY
|
||||||
|
WITH available_stores AS (
|
||||||
|
SELECT DISTINCT ON (d.id)
|
||||||
|
t.id as task_id,
|
||||||
|
d.id as dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
t.role,
|
||||||
|
t.platform,
|
||||||
|
t.method
|
||||||
|
FROM tasks t
|
||||||
|
JOIN dispensaries d ON d.id = t.dispensary_id
|
||||||
|
WHERE d.pool_id = worker_pool_id
|
||||||
|
AND t.status = 'pending'
|
||||||
|
AND t.scheduled_for <= NOW()
|
||||||
|
ORDER BY d.id, t.priority DESC, t.created_at ASC
|
||||||
|
LIMIT stores_remaining
|
||||||
|
),
|
||||||
|
claimed AS (
|
||||||
|
UPDATE tasks
|
||||||
|
SET
|
||||||
|
status = 'claimed',
|
||||||
|
claimed_by = p_worker_id,
|
||||||
|
claimed_at = NOW()
|
||||||
|
WHERE id IN (SELECT task_id FROM available_stores)
|
||||||
|
RETURNING id
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
av.task_id,
|
||||||
|
av.dispensary_id,
|
||||||
|
av.dispensary_name,
|
||||||
|
av.role,
|
||||||
|
av.platform,
|
||||||
|
av.method
|
||||||
|
FROM available_stores av
|
||||||
|
WHERE av.task_id IN (SELECT id FROM claimed);
|
||||||
|
|
||||||
|
-- Update worker store count
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
pool_stores_visited = pool_stores_visited + (
|
||||||
|
SELECT COUNT(DISTINCT dispensary_id)
|
||||||
|
FROM tasks
|
||||||
|
WHERE claimed_by = p_worker_id AND status = 'claimed'
|
||||||
|
),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- FUNCTION: Worker releases pool (exhausted or done)
|
||||||
|
-- ============================================================================
|
||||||
|
CREATE OR REPLACE FUNCTION worker_release_pool(p_worker_id VARCHAR(100))
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
current_pool_id = NULL,
|
||||||
|
pool_claimed_at = NULL,
|
||||||
|
pool_stores_visited = 0,
|
||||||
|
current_state = NULL,
|
||||||
|
current_city = NULL,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
RETURN true;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- RUN: Assign existing dispensaries to pools
|
||||||
|
-- ============================================================================
|
||||||
|
SELECT * FROM assign_all_dispensaries_to_pools();
|
||||||
10
backend/migrations/114_schedule_pool_id.sql
Normal file
10
backend/migrations/114_schedule_pool_id.sql
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
-- Migration 114: Add pool_id to task_schedules
|
||||||
|
-- Allows schedules to target specific geo pools
|
||||||
|
|
||||||
|
ALTER TABLE task_schedules
|
||||||
|
ADD COLUMN IF NOT EXISTS pool_id INTEGER REFERENCES task_pools(id);
|
||||||
|
|
||||||
|
-- Index for pool-based schedule queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_task_schedules_pool ON task_schedules(pool_id) WHERE pool_id IS NOT NULL;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN task_schedules.pool_id IS 'Optional geo pool filter. NULL = all pools/dispensaries matching state_code';
|
||||||
17
backend/migrations/115_task_proxy_ip.sql
Normal file
17
backend/migrations/115_task_proxy_ip.sql
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
-- Migration: Add proxy_ip tracking to worker_tasks
|
||||||
|
-- Purpose: Prevent same IP from hitting multiple stores on same platform simultaneously
|
||||||
|
--
|
||||||
|
-- Anti-detection measure: Dutchie/Jane may flag if same IP makes requests
|
||||||
|
-- for multiple different stores. This column lets us track and prevent that.
|
||||||
|
|
||||||
|
-- Add proxy_ip column to track which proxy IP is being used for each task
|
||||||
|
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS proxy_ip VARCHAR(45);
|
||||||
|
|
||||||
|
-- Index for quick lookup of active tasks by proxy IP
|
||||||
|
-- Used to check: "Is this IP already hitting another store?"
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_proxy_ip_active
|
||||||
|
ON worker_tasks (proxy_ip, platform)
|
||||||
|
WHERE status IN ('claimed', 'running') AND proxy_ip IS NOT NULL;
|
||||||
|
|
||||||
|
-- Comment
|
||||||
|
COMMENT ON COLUMN worker_tasks.proxy_ip IS 'Proxy IP assigned to this task. Used to prevent same IP hitting multiple stores on same platform.';
|
||||||
16
backend/migrations/116_task_source_tracking.sql
Normal file
16
backend/migrations/116_task_source_tracking.sql
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
-- Migration: Add source tracking columns to worker_tasks
|
||||||
|
-- Purpose: Track where tasks originated from (schedule, API, manual)
|
||||||
|
|
||||||
|
-- Add source tracking columns
|
||||||
|
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source VARCHAR(50);
|
||||||
|
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source_schedule_id INTEGER REFERENCES task_schedules(id);
|
||||||
|
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source_metadata JSONB;
|
||||||
|
|
||||||
|
-- Index for tracking tasks by schedule
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_source_schedule
|
||||||
|
ON worker_tasks (source_schedule_id) WHERE source_schedule_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON COLUMN worker_tasks.source IS 'Origin of task: schedule, api, manual, chain';
|
||||||
|
COMMENT ON COLUMN worker_tasks.source_schedule_id IS 'ID of schedule that created this task';
|
||||||
|
COMMENT ON COLUMN worker_tasks.source_metadata IS 'Additional metadata about task origin';
|
||||||
32
backend/migrations/117_per_store_crawl_interval.sql
Normal file
32
backend/migrations/117_per_store_crawl_interval.sql
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
-- Migration 117: Per-store crawl interval scheduling
|
||||||
|
-- Adds columns for configurable per-store crawl intervals
|
||||||
|
-- Part of Real-Time Inventory Tracking feature
|
||||||
|
|
||||||
|
-- Per-store crawl interval (NULL = use state schedule default 4h)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_interval_minutes INT DEFAULT NULL;
|
||||||
|
|
||||||
|
-- When this store should next be crawled (used by high-frequency scheduler)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS next_crawl_at TIMESTAMPTZ DEFAULT NULL;
|
||||||
|
|
||||||
|
-- Track last request time to enforce minimum spacing
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_crawl_started_at TIMESTAMPTZ DEFAULT NULL;
|
||||||
|
|
||||||
|
-- Change tracking for optimization
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_inventory_hash TEXT DEFAULT NULL;
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_price_hash TEXT DEFAULT NULL;
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS inventory_changes_24h INT DEFAULT 0;
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS price_changes_24h INT DEFAULT 0;
|
||||||
|
|
||||||
|
-- Index for scheduler query: find stores due for high-frequency crawl
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_next_crawl
|
||||||
|
ON dispensaries(next_crawl_at)
|
||||||
|
WHERE crawl_interval_minutes IS NOT NULL AND crawl_enabled = TRUE;
|
||||||
|
|
||||||
|
-- Comment for documentation
|
||||||
|
COMMENT ON COLUMN dispensaries.crawl_interval_minutes IS 'Custom crawl interval in minutes. NULL = use state schedule (4h default). Set to 15/30/60 for high-frequency tracking.';
|
||||||
|
COMMENT ON COLUMN dispensaries.next_crawl_at IS 'When this store should next be crawled. Updated after each crawl with interval + jitter.';
|
||||||
|
COMMENT ON COLUMN dispensaries.last_crawl_started_at IS 'When the last crawl task was created. Used to enforce minimum spacing.';
|
||||||
|
COMMENT ON COLUMN dispensaries.last_inventory_hash IS 'Hash of inventory state from last crawl. Used to detect changes and skip unchanged payloads.';
|
||||||
|
COMMENT ON COLUMN dispensaries.last_price_hash IS 'Hash of price state from last crawl. Used to detect price changes.';
|
||||||
|
COMMENT ON COLUMN dispensaries.inventory_changes_24h IS 'Number of inventory changes detected in last 24h. Indicates store volatility.';
|
||||||
|
COMMENT ON COLUMN dispensaries.price_changes_24h IS 'Number of price changes detected in last 24h.';
|
||||||
48
backend/migrations/118_inventory_snapshots.sql
Normal file
48
backend/migrations/118_inventory_snapshots.sql
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
-- Migration 118: Inventory snapshots table
|
||||||
|
-- Lightweight per-product tracking for sales velocity estimation
|
||||||
|
-- Part of Real-Time Inventory Tracking feature
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS inventory_snapshots (
|
||||||
|
id BIGSERIAL PRIMARY KEY,
|
||||||
|
dispensary_id INT NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
product_id TEXT NOT NULL, -- provider_product_id (normalized across platforms)
|
||||||
|
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
-- Platform (for debugging/filtering)
|
||||||
|
platform TEXT NOT NULL, -- 'dutchie' | 'jane' | 'treez'
|
||||||
|
|
||||||
|
-- Inventory fields (normalized from all platforms)
|
||||||
|
quantity_available INT, -- Dutchie: quantityAvailable, Jane: quantity, Treez: quantityAvailable
|
||||||
|
is_below_threshold BOOLEAN, -- Dutchie: isBelowThreshold, Jane: computed, Treez: lowInventory
|
||||||
|
status TEXT, -- Active/Inactive/available
|
||||||
|
|
||||||
|
-- Price fields (normalized)
|
||||||
|
price_rec NUMERIC(10,2), -- recreational price
|
||||||
|
price_med NUMERIC(10,2), -- medical price (if different)
|
||||||
|
|
||||||
|
-- Denormalized for fast queries
|
||||||
|
brand_name TEXT,
|
||||||
|
category TEXT,
|
||||||
|
product_name TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Primary query: get snapshots for a store over time
|
||||||
|
CREATE INDEX idx_inv_snap_store_time ON inventory_snapshots(dispensary_id, captured_at DESC);
|
||||||
|
|
||||||
|
-- Delta calculation: get consecutive snapshots for a product
|
||||||
|
CREATE INDEX idx_inv_snap_product_time ON inventory_snapshots(dispensary_id, product_id, captured_at DESC);
|
||||||
|
|
||||||
|
-- Brand-level analytics
|
||||||
|
CREATE INDEX idx_inv_snap_brand_time ON inventory_snapshots(brand_name, captured_at DESC) WHERE brand_name IS NOT NULL;
|
||||||
|
|
||||||
|
-- Platform filtering
|
||||||
|
CREATE INDEX idx_inv_snap_platform ON inventory_snapshots(platform, captured_at DESC);
|
||||||
|
|
||||||
|
-- Retention cleanup (30 days) - simple index, cleanup job handles the WHERE
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_inv_snap_cleanup ON inventory_snapshots(captured_at);
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON TABLE inventory_snapshots IS 'Lightweight inventory snapshots for sales velocity tracking. Retained 30 days.';
|
||||||
|
COMMENT ON COLUMN inventory_snapshots.product_id IS 'Provider product ID, normalized across platforms';
|
||||||
|
COMMENT ON COLUMN inventory_snapshots.platform IS 'Menu platform: dutchie, jane, or treez';
|
||||||
|
COMMENT ON COLUMN inventory_snapshots.quantity_available IS 'Current quantity in stock (Dutchie: quantityAvailable, Jane: quantity)';
|
||||||
53
backend/migrations/119_product_visibility_events.sql
Normal file
53
backend/migrations/119_product_visibility_events.sql
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
-- Migration 119: Product visibility events table
|
||||||
|
-- Tracks OOS, brand drops, and other notable events for alerts
|
||||||
|
-- Part of Real-Time Inventory Tracking feature
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS product_visibility_events (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
dispensary_id INT NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
|
||||||
|
-- Product identification (null for brand-level events)
|
||||||
|
product_id TEXT, -- provider_product_id
|
||||||
|
product_name TEXT, -- For display in alerts
|
||||||
|
|
||||||
|
-- Brand (always populated)
|
||||||
|
brand_name TEXT,
|
||||||
|
|
||||||
|
-- Event details
|
||||||
|
event_type TEXT NOT NULL, -- 'oos', 'back_in_stock', 'brand_dropped', 'brand_added', 'price_change'
|
||||||
|
detected_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
-- Context
|
||||||
|
previous_quantity INT, -- For OOS events: what quantity was before
|
||||||
|
previous_price NUMERIC(10,2), -- For price change events
|
||||||
|
new_price NUMERIC(10,2), -- For price change events
|
||||||
|
price_change_pct NUMERIC(5,2), -- Percentage change (e.g., -15.5 for 15.5% decrease)
|
||||||
|
|
||||||
|
-- Platform
|
||||||
|
platform TEXT, -- 'dutchie' | 'jane' | 'treez'
|
||||||
|
|
||||||
|
-- Alert status
|
||||||
|
notified BOOLEAN DEFAULT FALSE, -- Has external system been notified?
|
||||||
|
acknowledged_at TIMESTAMPTZ, -- When user acknowledged the alert
|
||||||
|
acknowledged_by TEXT -- User who acknowledged
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Primary query: recent events by store
|
||||||
|
CREATE INDEX idx_vis_events_store_time ON product_visibility_events(dispensary_id, detected_at DESC);
|
||||||
|
|
||||||
|
-- Alert queries: unnotified events
|
||||||
|
CREATE INDEX idx_vis_events_unnotified ON product_visibility_events(notified, detected_at DESC) WHERE notified = FALSE;
|
||||||
|
|
||||||
|
-- Event type filtering
|
||||||
|
CREATE INDEX idx_vis_events_type ON product_visibility_events(event_type, detected_at DESC);
|
||||||
|
|
||||||
|
-- Brand-level queries
|
||||||
|
CREATE INDEX idx_vis_events_brand ON product_visibility_events(brand_name, event_type, detected_at DESC) WHERE brand_name IS NOT NULL;
|
||||||
|
|
||||||
|
-- Cleanup (90 days retention) - simple index, cleanup job handles the WHERE
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_vis_events_cleanup ON product_visibility_events(detected_at);
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON TABLE product_visibility_events IS 'Notable inventory events for alerting. OOS, brand drops, significant price changes. Retained 90 days.';
|
||||||
|
COMMENT ON COLUMN product_visibility_events.event_type IS 'Event type: oos (out of stock), back_in_stock, brand_dropped, brand_added, price_change';
|
||||||
|
COMMENT ON COLUMN product_visibility_events.notified IS 'Whether external systems (other apps) have been notified of this event';
|
||||||
13
backend/migrations/120_daily_baseline_tracking.sql
Normal file
13
backend/migrations/120_daily_baseline_tracking.sql
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
-- Migration 120: Daily baseline tracking
|
||||||
|
-- Track when each store's daily baseline payload was last saved
|
||||||
|
-- Part of Real-Time Inventory Tracking feature
|
||||||
|
|
||||||
|
-- Add column to track last baseline save time
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_baseline_at TIMESTAMPTZ DEFAULT NULL;
|
||||||
|
|
||||||
|
-- Index for finding stores that need baselines
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_baseline ON dispensaries(last_baseline_at)
|
||||||
|
WHERE crawl_enabled = TRUE;
|
||||||
|
|
||||||
|
-- Comment
|
||||||
|
COMMENT ON COLUMN dispensaries.last_baseline_at IS 'Timestamp of last daily baseline payload save. Baselines saved once per day between 12:01 AM - 3:00 AM.';
|
||||||
383
backend/migrations/121_sales_analytics_views.sql
Normal file
383
backend/migrations/121_sales_analytics_views.sql
Normal file
@@ -0,0 +1,383 @@
|
|||||||
|
-- Migration 121: Sales Analytics Materialized Views
|
||||||
|
-- Pre-computed views for sales velocity, brand market share, and store performance
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW 1: Daily Sales Estimates (per product/store)
|
||||||
|
-- Calculates delta between consecutive snapshots
|
||||||
|
-- ============================================================
|
||||||
|
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_daily_sales_estimates AS
|
||||||
|
WITH qty_deltas AS (
|
||||||
|
SELECT
|
||||||
|
dispensary_id,
|
||||||
|
product_id,
|
||||||
|
brand_name,
|
||||||
|
category,
|
||||||
|
DATE(captured_at) AS sale_date,
|
||||||
|
price_rec,
|
||||||
|
quantity_available,
|
||||||
|
LAG(quantity_available) OVER (
|
||||||
|
PARTITION BY dispensary_id, product_id
|
||||||
|
ORDER BY captured_at
|
||||||
|
) AS prev_quantity
|
||||||
|
FROM inventory_snapshots
|
||||||
|
WHERE quantity_available IS NOT NULL
|
||||||
|
AND captured_at >= NOW() - INTERVAL '30 days'
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
dispensary_id,
|
||||||
|
product_id,
|
||||||
|
brand_name,
|
||||||
|
category,
|
||||||
|
sale_date,
|
||||||
|
AVG(price_rec) AS avg_price,
|
||||||
|
SUM(GREATEST(0, COALESCE(prev_quantity, 0) - quantity_available)) AS units_sold,
|
||||||
|
SUM(GREATEST(0, quantity_available - COALESCE(prev_quantity, 0))) AS units_restocked,
|
||||||
|
SUM(GREATEST(0, COALESCE(prev_quantity, 0) - quantity_available) * COALESCE(price_rec, 0)) AS revenue_estimate,
|
||||||
|
COUNT(*) AS snapshot_count
|
||||||
|
FROM qty_deltas
|
||||||
|
WHERE prev_quantity IS NOT NULL
|
||||||
|
GROUP BY dispensary_id, product_id, brand_name, category, sale_date;
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_daily_sales_pk
|
||||||
|
ON mv_daily_sales_estimates(dispensary_id, product_id, sale_date);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_brand
|
||||||
|
ON mv_daily_sales_estimates(brand_name, sale_date);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_category
|
||||||
|
ON mv_daily_sales_estimates(category, sale_date);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_date
|
||||||
|
ON mv_daily_sales_estimates(sale_date DESC);
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW 2: Brand Market Share by State
|
||||||
|
-- Weighted distribution across stores
|
||||||
|
-- ============================================================
|
||||||
|
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_brand_market_share AS
|
||||||
|
WITH brand_presence AS (
|
||||||
|
SELECT
|
||||||
|
sp.brand AS brand_name,
|
||||||
|
d.state AS state_code,
|
||||||
|
COUNT(DISTINCT sp.dispensary_id) AS stores_carrying,
|
||||||
|
COUNT(*) AS sku_count,
|
||||||
|
SUM(CASE WHEN sp.is_in_stock THEN 1 ELSE 0 END) AS in_stock_skus,
|
||||||
|
AVG(sp.price_rec) AS avg_price
|
||||||
|
FROM store_products sp
|
||||||
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
WHERE sp.brand IS NOT NULL
|
||||||
|
AND d.state IS NOT NULL
|
||||||
|
GROUP BY sp.brand, d.state
|
||||||
|
),
|
||||||
|
state_totals AS (
|
||||||
|
SELECT
|
||||||
|
d.state AS state_code,
|
||||||
|
COUNT(DISTINCT d.id) FILTER (WHERE d.crawl_enabled) AS total_stores
|
||||||
|
FROM dispensaries d
|
||||||
|
WHERE d.state IS NOT NULL
|
||||||
|
GROUP BY d.state
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
bp.brand_name,
|
||||||
|
bp.state_code,
|
||||||
|
bp.stores_carrying,
|
||||||
|
st.total_stores,
|
||||||
|
ROUND(bp.stores_carrying::NUMERIC * 100 / NULLIF(st.total_stores, 0), 2) AS penetration_pct,
|
||||||
|
bp.sku_count,
|
||||||
|
bp.in_stock_skus,
|
||||||
|
bp.avg_price,
|
||||||
|
NOW() AS calculated_at
|
||||||
|
FROM brand_presence bp
|
||||||
|
JOIN state_totals st ON st.state_code = bp.state_code;
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_brand_market_pk
|
||||||
|
ON mv_brand_market_share(brand_name, state_code);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_brand_market_state
|
||||||
|
ON mv_brand_market_share(state_code);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_brand_market_penetration
|
||||||
|
ON mv_brand_market_share(penetration_pct DESC);
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW 3: SKU Velocity (30-day rolling)
|
||||||
|
-- Average daily units sold per SKU
|
||||||
|
-- ============================================================
|
||||||
|
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_sku_velocity AS
|
||||||
|
SELECT
|
||||||
|
dse.product_id,
|
||||||
|
dse.brand_name,
|
||||||
|
dse.category,
|
||||||
|
dse.dispensary_id,
|
||||||
|
d.name AS dispensary_name,
|
||||||
|
d.state AS state_code,
|
||||||
|
SUM(dse.units_sold) AS total_units_30d,
|
||||||
|
SUM(dse.revenue_estimate) AS total_revenue_30d,
|
||||||
|
COUNT(DISTINCT dse.sale_date) AS days_with_sales,
|
||||||
|
ROUND(SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0), 2) AS avg_daily_units,
|
||||||
|
AVG(dse.avg_price) AS avg_price,
|
||||||
|
CASE
|
||||||
|
WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 5 THEN 'hot'
|
||||||
|
WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 1 THEN 'steady'
|
||||||
|
WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 0.1 THEN 'slow'
|
||||||
|
ELSE 'stale'
|
||||||
|
END AS velocity_tier,
|
||||||
|
NOW() AS calculated_at
|
||||||
|
FROM mv_daily_sales_estimates dse
|
||||||
|
JOIN dispensaries d ON d.id = dse.dispensary_id
|
||||||
|
WHERE dse.sale_date >= CURRENT_DATE - INTERVAL '30 days'
|
||||||
|
GROUP BY dse.product_id, dse.brand_name, dse.category, dse.dispensary_id, d.name, d.state;
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_sku_velocity_pk
|
||||||
|
ON mv_sku_velocity(dispensary_id, product_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_brand
|
||||||
|
ON mv_sku_velocity(brand_name);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_tier
|
||||||
|
ON mv_sku_velocity(velocity_tier);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_state
|
||||||
|
ON mv_sku_velocity(state_code);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_units
|
||||||
|
ON mv_sku_velocity(total_units_30d DESC);
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW 4: Store Performance Rankings
|
||||||
|
-- Revenue estimates and brand diversity per store
|
||||||
|
-- ============================================================
|
||||||
|
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_store_performance AS
|
||||||
|
SELECT
|
||||||
|
d.id AS dispensary_id,
|
||||||
|
d.name AS dispensary_name,
|
||||||
|
d.city,
|
||||||
|
d.state AS state_code,
|
||||||
|
-- Revenue metrics from sales estimates
|
||||||
|
COALESCE(sales.total_revenue_30d, 0) AS total_revenue_30d,
|
||||||
|
COALESCE(sales.total_units_30d, 0) AS total_units_30d,
|
||||||
|
-- Inventory metrics
|
||||||
|
COUNT(DISTINCT sp.id) AS total_skus,
|
||||||
|
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock) AS in_stock_skus,
|
||||||
|
-- Brand diversity
|
||||||
|
COUNT(DISTINCT sp.brand) AS unique_brands,
|
||||||
|
COUNT(DISTINCT sp.category) AS unique_categories,
|
||||||
|
-- Pricing
|
||||||
|
AVG(sp.price_rec) AS avg_price,
|
||||||
|
-- Activity
|
||||||
|
MAX(sp.updated_at) AS last_updated,
|
||||||
|
NOW() AS calculated_at
|
||||||
|
FROM dispensaries d
|
||||||
|
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
||||||
|
LEFT JOIN (
|
||||||
|
SELECT
|
||||||
|
dispensary_id,
|
||||||
|
SUM(revenue_estimate) AS total_revenue_30d,
|
||||||
|
SUM(units_sold) AS total_units_30d
|
||||||
|
FROM mv_daily_sales_estimates
|
||||||
|
WHERE sale_date >= CURRENT_DATE - INTERVAL '30 days'
|
||||||
|
GROUP BY dispensary_id
|
||||||
|
) sales ON sales.dispensary_id = d.id
|
||||||
|
WHERE d.crawl_enabled = TRUE
|
||||||
|
GROUP BY d.id, d.name, d.city, d.state, sales.total_revenue_30d, sales.total_units_30d;
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_store_perf_pk
|
||||||
|
ON mv_store_performance(dispensary_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_store_perf_state
|
||||||
|
ON mv_store_performance(state_code);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_store_perf_revenue
|
||||||
|
ON mv_store_performance(total_revenue_30d DESC);
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW 5: Weekly Category Trends
|
||||||
|
-- Category performance over time
|
||||||
|
-- ============================================================
|
||||||
|
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_category_weekly_trends AS
|
||||||
|
SELECT
|
||||||
|
dse.category,
|
||||||
|
d.state AS state_code,
|
||||||
|
DATE_TRUNC('week', dse.sale_date)::DATE AS week_start,
|
||||||
|
COUNT(DISTINCT dse.product_id) AS sku_count,
|
||||||
|
COUNT(DISTINCT dse.dispensary_id) AS store_count,
|
||||||
|
SUM(dse.units_sold) AS total_units,
|
||||||
|
SUM(dse.revenue_estimate) AS total_revenue,
|
||||||
|
AVG(dse.avg_price) AS avg_price,
|
||||||
|
NOW() AS calculated_at
|
||||||
|
FROM mv_daily_sales_estimates dse
|
||||||
|
JOIN dispensaries d ON d.id = dse.dispensary_id
|
||||||
|
WHERE dse.category IS NOT NULL
|
||||||
|
AND dse.sale_date >= CURRENT_DATE - INTERVAL '90 days'
|
||||||
|
GROUP BY dse.category, d.state, DATE_TRUNC('week', dse.sale_date);
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_cat_weekly_pk
|
||||||
|
ON mv_category_weekly_trends(category, state_code, week_start);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_cat_weekly_state
|
||||||
|
ON mv_category_weekly_trends(state_code, week_start);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_cat_weekly_date
|
||||||
|
ON mv_category_weekly_trends(week_start DESC);
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW 6: Product Intelligence (Hoodie-style per-product metrics)
|
||||||
|
-- Includes stock diff, days since OOS, days until stockout
|
||||||
|
-- ============================================================
|
||||||
|
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_product_intelligence AS
|
||||||
|
WITH
|
||||||
|
-- Calculate stock diff over 120 days
|
||||||
|
stock_diff AS (
|
||||||
|
SELECT
|
||||||
|
dispensary_id,
|
||||||
|
product_id,
|
||||||
|
-- Get oldest and newest quantity in last 120 days
|
||||||
|
FIRST_VALUE(quantity_available) OVER (
|
||||||
|
PARTITION BY dispensary_id, product_id
|
||||||
|
ORDER BY captured_at ASC
|
||||||
|
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
|
||||||
|
) AS qty_120d_ago,
|
||||||
|
LAST_VALUE(quantity_available) OVER (
|
||||||
|
PARTITION BY dispensary_id, product_id
|
||||||
|
ORDER BY captured_at ASC
|
||||||
|
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
|
||||||
|
) AS qty_current
|
||||||
|
FROM inventory_snapshots
|
||||||
|
WHERE captured_at >= NOW() - INTERVAL '120 days'
|
||||||
|
),
|
||||||
|
stock_diff_calc AS (
|
||||||
|
SELECT DISTINCT
|
||||||
|
dispensary_id,
|
||||||
|
product_id,
|
||||||
|
qty_current - COALESCE(qty_120d_ago, qty_current) AS stock_diff_120
|
||||||
|
FROM stock_diff
|
||||||
|
),
|
||||||
|
-- Get days since last OOS event
|
||||||
|
last_oos AS (
|
||||||
|
SELECT
|
||||||
|
dispensary_id,
|
||||||
|
product_id,
|
||||||
|
MAX(detected_at) AS last_oos_date
|
||||||
|
FROM product_visibility_events
|
||||||
|
WHERE event_type = 'oos'
|
||||||
|
GROUP BY dispensary_id, product_id
|
||||||
|
),
|
||||||
|
-- Calculate avg daily units sold (from velocity view)
|
||||||
|
velocity AS (
|
||||||
|
SELECT
|
||||||
|
dispensary_id,
|
||||||
|
product_id,
|
||||||
|
avg_daily_units
|
||||||
|
FROM mv_sku_velocity
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
sp.dispensary_id,
|
||||||
|
d.name AS dispensary_name,
|
||||||
|
d.state AS state_code,
|
||||||
|
d.city,
|
||||||
|
sp.provider_product_id AS sku,
|
||||||
|
sp.name_raw AS product_name,
|
||||||
|
sp.brand_name_raw AS brand,
|
||||||
|
sp.category_raw AS category,
|
||||||
|
sp.is_in_stock,
|
||||||
|
sp.stock_status,
|
||||||
|
sp.stock_quantity,
|
||||||
|
sp.price_rec AS price,
|
||||||
|
sp.first_seen_at AS first_seen,
|
||||||
|
sp.last_seen_at AS last_seen,
|
||||||
|
-- Calculated fields
|
||||||
|
COALESCE(sd.stock_diff_120, 0) AS stock_diff_120,
|
||||||
|
CASE
|
||||||
|
WHEN lo.last_oos_date IS NOT NULL
|
||||||
|
THEN EXTRACT(DAY FROM NOW() - lo.last_oos_date)::INT
|
||||||
|
ELSE NULL
|
||||||
|
END AS days_since_oos,
|
||||||
|
-- Days until stockout = current stock / daily burn rate
|
||||||
|
CASE
|
||||||
|
WHEN v.avg_daily_units > 0 AND sp.stock_quantity > 0
|
||||||
|
THEN ROUND(sp.stock_quantity::NUMERIC / v.avg_daily_units)::INT
|
||||||
|
ELSE NULL
|
||||||
|
END AS days_until_stock_out,
|
||||||
|
v.avg_daily_units,
|
||||||
|
NOW() AS calculated_at
|
||||||
|
FROM store_products sp
|
||||||
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
LEFT JOIN stock_diff_calc sd ON sd.dispensary_id = sp.dispensary_id
|
||||||
|
AND sd.product_id = sp.provider_product_id
|
||||||
|
LEFT JOIN last_oos lo ON lo.dispensary_id = sp.dispensary_id
|
||||||
|
AND lo.product_id = sp.provider_product_id
|
||||||
|
LEFT JOIN velocity v ON v.dispensary_id = sp.dispensary_id
|
||||||
|
AND v.product_id = sp.provider_product_id
|
||||||
|
WHERE d.crawl_enabled = TRUE;
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_prod_intel_pk
|
||||||
|
ON mv_product_intelligence(dispensary_id, sku);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_brand
|
||||||
|
ON mv_product_intelligence(brand);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_state
|
||||||
|
ON mv_product_intelligence(state_code);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_stock_out
|
||||||
|
ON mv_product_intelligence(days_until_stock_out ASC NULLS LAST);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_oos
|
||||||
|
ON mv_product_intelligence(days_since_oos DESC NULLS LAST);
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- REFRESH FUNCTION
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION refresh_sales_analytics_views()
|
||||||
|
RETURNS TABLE(view_name TEXT, rows_affected BIGINT) AS $$
|
||||||
|
DECLARE
|
||||||
|
row_count BIGINT;
|
||||||
|
BEGIN
|
||||||
|
-- Must refresh in dependency order:
|
||||||
|
-- 1. daily_sales (base view)
|
||||||
|
-- 2. sku_velocity (depends on daily_sales)
|
||||||
|
-- 3. product_intelligence (depends on sku_velocity)
|
||||||
|
-- 4. others (independent)
|
||||||
|
|
||||||
|
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_daily_sales_estimates;
|
||||||
|
SELECT COUNT(*) INTO row_count FROM mv_daily_sales_estimates;
|
||||||
|
view_name := 'mv_daily_sales_estimates';
|
||||||
|
rows_affected := row_count;
|
||||||
|
RETURN NEXT;
|
||||||
|
|
||||||
|
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_brand_market_share;
|
||||||
|
SELECT COUNT(*) INTO row_count FROM mv_brand_market_share;
|
||||||
|
view_name := 'mv_brand_market_share';
|
||||||
|
rows_affected := row_count;
|
||||||
|
RETURN NEXT;
|
||||||
|
|
||||||
|
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_sku_velocity;
|
||||||
|
SELECT COUNT(*) INTO row_count FROM mv_sku_velocity;
|
||||||
|
view_name := 'mv_sku_velocity';
|
||||||
|
rows_affected := row_count;
|
||||||
|
RETURN NEXT;
|
||||||
|
|
||||||
|
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_store_performance;
|
||||||
|
SELECT COUNT(*) INTO row_count FROM mv_store_performance;
|
||||||
|
view_name := 'mv_store_performance';
|
||||||
|
rows_affected := row_count;
|
||||||
|
RETURN NEXT;
|
||||||
|
|
||||||
|
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_category_weekly_trends;
|
||||||
|
SELECT COUNT(*) INTO row_count FROM mv_category_weekly_trends;
|
||||||
|
view_name := 'mv_category_weekly_trends';
|
||||||
|
rows_affected := row_count;
|
||||||
|
RETURN NEXT;
|
||||||
|
|
||||||
|
-- Product intelligence depends on sku_velocity, so refresh last
|
||||||
|
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_product_intelligence;
|
||||||
|
SELECT COUNT(*) INTO row_count FROM mv_product_intelligence;
|
||||||
|
view_name := 'mv_product_intelligence';
|
||||||
|
rows_affected := row_count;
|
||||||
|
RETURN NEXT;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION refresh_sales_analytics_views IS
|
||||||
|
'Refresh all sales analytics materialized views. Call hourly via scheduler.';
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- INITIAL REFRESH (populate views)
|
||||||
|
-- ============================================================
|
||||||
|
-- Note: Initial refresh must be non-concurrent (no unique index yet populated)
|
||||||
|
-- Run these manually after migration:
|
||||||
|
-- REFRESH MATERIALIZED VIEW mv_daily_sales_estimates;
|
||||||
|
-- REFRESH MATERIALIZED VIEW mv_brand_market_share;
|
||||||
|
-- REFRESH MATERIALIZED VIEW mv_sku_velocity;
|
||||||
|
-- REFRESH MATERIALIZED VIEW mv_store_performance;
|
||||||
|
-- REFRESH MATERIALIZED VIEW mv_category_weekly_trends;
|
||||||
359
backend/migrations/122_market_intelligence_schema.sql
Normal file
359
backend/migrations/122_market_intelligence_schema.sql
Normal file
@@ -0,0 +1,359 @@
|
|||||||
|
-- Migration 122: Market Intelligence Schema
|
||||||
|
-- Separate schema for external market data ingestion
|
||||||
|
-- Supports product, brand, and dispensary data from third-party sources
|
||||||
|
|
||||||
|
-- Create dedicated schema
|
||||||
|
CREATE SCHEMA IF NOT EXISTS market_intel;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- BRANDS: Brand/Company Intelligence
|
||||||
|
-- ============================================================
|
||||||
|
CREATE TABLE IF NOT EXISTS market_intel.brands (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Identity
|
||||||
|
brand_name VARCHAR(255) NOT NULL,
|
||||||
|
parent_brand VARCHAR(255),
|
||||||
|
parent_company VARCHAR(255),
|
||||||
|
slug VARCHAR(255),
|
||||||
|
external_id VARCHAR(255) UNIQUE, -- objectID from source
|
||||||
|
|
||||||
|
-- Details
|
||||||
|
brand_description TEXT,
|
||||||
|
brand_logo_url TEXT,
|
||||||
|
brand_url TEXT,
|
||||||
|
linkedin_url TEXT,
|
||||||
|
|
||||||
|
-- Presence
|
||||||
|
states JSONB DEFAULT '[]', -- Array of state names
|
||||||
|
active_variants INTEGER DEFAULT 0,
|
||||||
|
all_variants INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
-- Metadata
|
||||||
|
source VARCHAR(50) DEFAULT 'external',
|
||||||
|
fetched_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_brands_name ON market_intel.brands(brand_name);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_brands_parent ON market_intel.brands(parent_brand);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_brands_external ON market_intel.brands(external_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_brands_states ON market_intel.brands USING GIN(states);
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- DISPENSARIES: Dispensary/Store Intelligence
|
||||||
|
-- ============================================================
|
||||||
|
CREATE TABLE IF NOT EXISTS market_intel.dispensaries (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Identity
|
||||||
|
dispensary_name VARCHAR(255) NOT NULL,
|
||||||
|
dispensary_company_name VARCHAR(255),
|
||||||
|
dispensary_company_id VARCHAR(255),
|
||||||
|
slug VARCHAR(255),
|
||||||
|
external_id VARCHAR(255) UNIQUE, -- objectID from source
|
||||||
|
|
||||||
|
-- Location
|
||||||
|
street_address VARCHAR(255),
|
||||||
|
city VARCHAR(100),
|
||||||
|
state VARCHAR(100),
|
||||||
|
postal_code VARCHAR(20),
|
||||||
|
county_name VARCHAR(100),
|
||||||
|
country_code VARCHAR(10) DEFAULT 'USA',
|
||||||
|
full_address TEXT,
|
||||||
|
latitude DECIMAL(10, 7),
|
||||||
|
longitude DECIMAL(10, 7),
|
||||||
|
timezone VARCHAR(50),
|
||||||
|
urbanicity VARCHAR(50), -- Urban, Suburban, Rural
|
||||||
|
|
||||||
|
-- Contact
|
||||||
|
phone VARCHAR(50),
|
||||||
|
email VARCHAR(255),
|
||||||
|
website TEXT,
|
||||||
|
linkedin_url TEXT,
|
||||||
|
|
||||||
|
-- License
|
||||||
|
license_number VARCHAR(100),
|
||||||
|
license_type VARCHAR(100),
|
||||||
|
|
||||||
|
-- Store Type
|
||||||
|
is_medical BOOLEAN DEFAULT FALSE,
|
||||||
|
is_recreational BOOLEAN DEFAULT FALSE,
|
||||||
|
delivery_enabled BOOLEAN DEFAULT FALSE,
|
||||||
|
curbside_pickup BOOLEAN DEFAULT FALSE,
|
||||||
|
instore_pickup BOOLEAN DEFAULT FALSE,
|
||||||
|
location_type VARCHAR(50), -- RETAIL, DELIVERY, etc.
|
||||||
|
|
||||||
|
-- Sales Estimates
|
||||||
|
estimated_daily_sales DECIMAL(12, 2),
|
||||||
|
estimated_sales DECIMAL(12, 2),
|
||||||
|
avg_daily_sales DECIMAL(12, 2),
|
||||||
|
state_sales_bucket INTEGER,
|
||||||
|
|
||||||
|
-- Customer Demographics
|
||||||
|
affluency JSONB DEFAULT '[]', -- Array of affluency segments
|
||||||
|
age_skew JSONB DEFAULT '[]', -- Array of age brackets
|
||||||
|
customer_segments JSONB DEFAULT '[]', -- Array of segment names
|
||||||
|
|
||||||
|
-- Inventory Stats
|
||||||
|
menus_count INTEGER DEFAULT 0,
|
||||||
|
menus_count_med INTEGER DEFAULT 0,
|
||||||
|
menus_count_rec INTEGER DEFAULT 0,
|
||||||
|
parent_brands JSONB DEFAULT '[]',
|
||||||
|
brand_company_names JSONB DEFAULT '[]',
|
||||||
|
|
||||||
|
-- Business Info
|
||||||
|
banner VARCHAR(255), -- Chain/banner name
|
||||||
|
business_type VARCHAR(50), -- MSO, Independent, etc.
|
||||||
|
pos_system VARCHAR(100),
|
||||||
|
atm_presence BOOLEAN DEFAULT FALSE,
|
||||||
|
tax_included BOOLEAN DEFAULT FALSE,
|
||||||
|
|
||||||
|
-- Ratings
|
||||||
|
rating DECIMAL(3, 2),
|
||||||
|
reviews_count INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
-- Status
|
||||||
|
is_closed BOOLEAN DEFAULT FALSE,
|
||||||
|
open_date TIMESTAMPTZ,
|
||||||
|
last_updated_at TIMESTAMPTZ,
|
||||||
|
|
||||||
|
-- Media
|
||||||
|
logo_url TEXT,
|
||||||
|
cover_url TEXT,
|
||||||
|
|
||||||
|
-- Metadata
|
||||||
|
source VARCHAR(50) DEFAULT 'external',
|
||||||
|
fetched_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_name ON market_intel.dispensaries(dispensary_name);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_state ON market_intel.dispensaries(state);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_city ON market_intel.dispensaries(city);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_external ON market_intel.dispensaries(external_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_banner ON market_intel.dispensaries(banner);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_business_type ON market_intel.dispensaries(business_type);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_geo ON market_intel.dispensaries(latitude, longitude);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_segments ON market_intel.dispensaries USING GIN(customer_segments);
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- PRODUCTS: Product/SKU Intelligence
|
||||||
|
-- ============================================================
|
||||||
|
CREATE TABLE IF NOT EXISTS market_intel.products (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Identity
|
||||||
|
name VARCHAR(500) NOT NULL,
|
||||||
|
brand VARCHAR(255),
|
||||||
|
brand_id VARCHAR(255),
|
||||||
|
brand_company_name VARCHAR(255),
|
||||||
|
parent_brand VARCHAR(255),
|
||||||
|
external_id VARCHAR(255) UNIQUE, -- objectID from source
|
||||||
|
cm_id VARCHAR(100), -- Canonical menu ID
|
||||||
|
|
||||||
|
-- Category Hierarchy
|
||||||
|
category_0 VARCHAR(100), -- Top level: Flower, Edibles, Vapes
|
||||||
|
category_1 VARCHAR(255), -- Mid level: Flower > Pre-Rolls
|
||||||
|
category_2 VARCHAR(500), -- Detailed: Flower > Pre-Rolls > Singles
|
||||||
|
|
||||||
|
-- Cannabis Classification
|
||||||
|
cannabis_type VARCHAR(50), -- SATIVA, INDICA, HYBRID
|
||||||
|
strain VARCHAR(255),
|
||||||
|
flavor VARCHAR(255),
|
||||||
|
pack_size VARCHAR(100),
|
||||||
|
description TEXT,
|
||||||
|
|
||||||
|
-- Cannabinoids
|
||||||
|
thc_mg DECIMAL(10, 2),
|
||||||
|
cbd_mg DECIMAL(10, 2),
|
||||||
|
percent_thc DECIMAL(5, 2),
|
||||||
|
percent_cbd DECIMAL(5, 2),
|
||||||
|
|
||||||
|
-- Dispensary Context (denormalized for query performance)
|
||||||
|
master_dispensary_name VARCHAR(255),
|
||||||
|
master_dispensary_id VARCHAR(255),
|
||||||
|
dispensary_count INTEGER DEFAULT 0, -- How many stores carry this
|
||||||
|
d_state VARCHAR(100),
|
||||||
|
d_city VARCHAR(100),
|
||||||
|
d_banner VARCHAR(255),
|
||||||
|
d_business_type VARCHAR(50),
|
||||||
|
d_medical BOOLEAN,
|
||||||
|
d_recreational BOOLEAN,
|
||||||
|
|
||||||
|
-- Customer Demographics (from dispensary)
|
||||||
|
d_customer_segments JSONB DEFAULT '[]',
|
||||||
|
d_age_skew JSONB DEFAULT '[]',
|
||||||
|
d_affluency JSONB DEFAULT '[]',
|
||||||
|
d_urbanicity VARCHAR(50),
|
||||||
|
|
||||||
|
-- Stock Status
|
||||||
|
in_stock BOOLEAN DEFAULT TRUE,
|
||||||
|
last_seen_at DATE,
|
||||||
|
last_seen_at_ts BIGINT,
|
||||||
|
|
||||||
|
-- Media
|
||||||
|
img_url TEXT,
|
||||||
|
product_url TEXT,
|
||||||
|
menu_slug VARCHAR(500),
|
||||||
|
|
||||||
|
-- Geo
|
||||||
|
latitude DECIMAL(10, 7),
|
||||||
|
longitude DECIMAL(10, 7),
|
||||||
|
|
||||||
|
-- Metadata
|
||||||
|
source VARCHAR(50) DEFAULT 'external',
|
||||||
|
fetched_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_products_name ON market_intel.products(name);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_products_brand ON market_intel.products(brand);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_products_external ON market_intel.products(external_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_products_category ON market_intel.products(category_0, category_1);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_products_cannabis_type ON market_intel.products(cannabis_type);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_products_strain ON market_intel.products(strain);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_products_state ON market_intel.products(d_state);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_products_in_stock ON market_intel.products(in_stock);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_products_dispensary_count ON market_intel.products(dispensary_count DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_products_segments ON market_intel.products USING GIN(d_customer_segments);
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- PRODUCT_VARIANTS: Variant-Level Data (Pricing, Stock)
|
||||||
|
-- ============================================================
|
||||||
|
CREATE TABLE IF NOT EXISTS market_intel.product_variants (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
product_id INTEGER REFERENCES market_intel.products(id) ON DELETE CASCADE,
|
||||||
|
|
||||||
|
-- Identity
|
||||||
|
variant_id VARCHAR(255) NOT NULL,
|
||||||
|
pos_sku VARCHAR(255),
|
||||||
|
pos_product_id VARCHAR(255),
|
||||||
|
pos_system VARCHAR(100),
|
||||||
|
|
||||||
|
-- Pricing
|
||||||
|
actual_price DECIMAL(10, 2),
|
||||||
|
original_price DECIMAL(10, 2),
|
||||||
|
discounted_price DECIMAL(10, 2),
|
||||||
|
|
||||||
|
-- Presentation
|
||||||
|
product_presentation VARCHAR(255), -- "100.00 mg", "3.5g", etc.
|
||||||
|
quantity DECIMAL(10, 2),
|
||||||
|
unit VARCHAR(50), -- mg, g, oz, each
|
||||||
|
|
||||||
|
-- Availability
|
||||||
|
is_medical BOOLEAN DEFAULT FALSE,
|
||||||
|
is_recreational BOOLEAN DEFAULT FALSE,
|
||||||
|
is_active BOOLEAN DEFAULT TRUE,
|
||||||
|
|
||||||
|
-- Stock Intelligence
|
||||||
|
stock_status VARCHAR(50), -- In Stock, Low Stock, Out of Stock
|
||||||
|
stock_diff_120 DECIMAL(10, 2), -- 120-day stock change
|
||||||
|
days_since_oos INTEGER,
|
||||||
|
days_until_stock_out INTEGER,
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
first_seen_at_ts BIGINT,
|
||||||
|
first_seen_at TIMESTAMPTZ,
|
||||||
|
last_seen_at DATE,
|
||||||
|
|
||||||
|
-- Metadata
|
||||||
|
fetched_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
UNIQUE(product_id, variant_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_product ON market_intel.product_variants(product_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_sku ON market_intel.product_variants(pos_sku);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_stock_status ON market_intel.product_variants(stock_status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_price ON market_intel.product_variants(actual_price);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_days_out ON market_intel.product_variants(days_until_stock_out);
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- FETCH_LOG: Track data fetches
|
||||||
|
-- ============================================================
|
||||||
|
CREATE TABLE IF NOT EXISTS market_intel.fetch_log (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
fetch_type VARCHAR(50) NOT NULL, -- brands, dispensaries, products
|
||||||
|
state_code VARCHAR(10),
|
||||||
|
query_params JSONB,
|
||||||
|
records_fetched INTEGER DEFAULT 0,
|
||||||
|
records_inserted INTEGER DEFAULT 0,
|
||||||
|
records_updated INTEGER DEFAULT 0,
|
||||||
|
duration_ms INTEGER,
|
||||||
|
error_message TEXT,
|
||||||
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
completed_at TIMESTAMPTZ
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_fetch_log_type ON market_intel.fetch_log(fetch_type);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_fetch_log_state ON market_intel.fetch_log(state_code);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_fetch_log_started ON market_intel.fetch_log(started_at DESC);
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- HELPER VIEWS
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Brand market presence summary
|
||||||
|
CREATE OR REPLACE VIEW market_intel.v_brand_presence AS
|
||||||
|
SELECT
|
||||||
|
b.brand_name,
|
||||||
|
b.parent_company,
|
||||||
|
b.active_variants,
|
||||||
|
b.all_variants,
|
||||||
|
jsonb_array_length(b.states) as state_count,
|
||||||
|
b.states,
|
||||||
|
b.fetched_at
|
||||||
|
FROM market_intel.brands b
|
||||||
|
ORDER BY b.active_variants DESC;
|
||||||
|
|
||||||
|
-- Dispensary sales rankings by state
|
||||||
|
CREATE OR REPLACE VIEW market_intel.v_dispensary_rankings AS
|
||||||
|
SELECT
|
||||||
|
d.dispensary_name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
d.banner,
|
||||||
|
d.business_type,
|
||||||
|
d.estimated_daily_sales,
|
||||||
|
d.menus_count,
|
||||||
|
d.is_medical,
|
||||||
|
d.is_recreational,
|
||||||
|
d.customer_segments,
|
||||||
|
RANK() OVER (PARTITION BY d.state ORDER BY d.estimated_daily_sales DESC NULLS LAST) as state_rank
|
||||||
|
FROM market_intel.dispensaries d
|
||||||
|
WHERE d.is_closed = FALSE;
|
||||||
|
|
||||||
|
-- Product distribution by brand and state
|
||||||
|
CREATE OR REPLACE VIEW market_intel.v_product_distribution AS
|
||||||
|
SELECT
|
||||||
|
p.brand,
|
||||||
|
p.d_state as state,
|
||||||
|
p.category_0 as category,
|
||||||
|
COUNT(*) as product_count,
|
||||||
|
COUNT(*) FILTER (WHERE p.in_stock) as in_stock_count,
|
||||||
|
AVG(p.dispensary_count) as avg_store_count,
|
||||||
|
COUNT(DISTINCT p.master_dispensary_id) as unique_stores
|
||||||
|
FROM market_intel.products p
|
||||||
|
GROUP BY p.brand, p.d_state, p.category_0;
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- COMMENTS
|
||||||
|
-- ============================================================
|
||||||
|
COMMENT ON SCHEMA market_intel IS 'Market intelligence data from external sources';
|
||||||
|
COMMENT ON TABLE market_intel.brands IS 'Brand/company data with multi-state presence';
|
||||||
|
COMMENT ON TABLE market_intel.dispensaries IS 'Dispensary data with sales estimates and demographics';
|
||||||
|
COMMENT ON TABLE market_intel.products IS 'Product/SKU data with cannabinoid and category info';
|
||||||
|
COMMENT ON TABLE market_intel.product_variants IS 'Variant-level pricing and stock data';
|
||||||
|
COMMENT ON TABLE market_intel.fetch_log IS 'Log of data fetches for monitoring';
|
||||||
159
backend/migrations/123_extract_provider_fields.sql
Normal file
159
backend/migrations/123_extract_provider_fields.sql
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
-- Migration 123: Extract unmapped fields from provider_data
|
||||||
|
-- These fields exist in our crawl payloads but weren't being stored in columns
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- ADD NEW COLUMNS TO store_products
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Cannabis classification (SATIVA, INDICA, HYBRID, CBD)
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cannabis_type VARCHAR(50);
|
||||||
|
|
||||||
|
-- Canonical IDs from POS systems
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_strain_id VARCHAR(100);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_vendor_id VARCHAR(100);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_brand_id VARCHAR(100);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_category_id VARCHAR(100);
|
||||||
|
|
||||||
|
-- Lab results
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS lab_result_url TEXT;
|
||||||
|
|
||||||
|
-- Flavors (extracted from JSONB to text array for easier querying)
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS flavors_list TEXT[];
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- BACKFILL FROM provider_data
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Backfill cannabis_type from classification
|
||||||
|
UPDATE store_products
|
||||||
|
SET cannabis_type = CASE
|
||||||
|
WHEN provider_data->>'classification' IN ('HYBRID', 'H') THEN 'HYBRID'
|
||||||
|
WHEN provider_data->>'classification' IN ('INDICA', 'I') THEN 'INDICA'
|
||||||
|
WHEN provider_data->>'classification' IN ('SATIVA', 'S') THEN 'SATIVA'
|
||||||
|
WHEN provider_data->>'classification' = 'I/S' THEN 'INDICA_DOMINANT'
|
||||||
|
WHEN provider_data->>'classification' = 'S/I' THEN 'SATIVA_DOMINANT'
|
||||||
|
WHEN provider_data->>'classification' = 'CBD' THEN 'CBD'
|
||||||
|
ELSE provider_data->>'classification'
|
||||||
|
END
|
||||||
|
WHERE provider_data->>'classification' IS NOT NULL
|
||||||
|
AND cannabis_type IS NULL;
|
||||||
|
|
||||||
|
-- Also backfill from strain_type if cannabis_type still null
|
||||||
|
UPDATE store_products
|
||||||
|
SET cannabis_type = CASE
|
||||||
|
WHEN strain_type ILIKE '%indica%hybrid%' OR strain_type ILIKE '%hybrid%indica%' THEN 'INDICA_DOMINANT'
|
||||||
|
WHEN strain_type ILIKE '%sativa%hybrid%' OR strain_type ILIKE '%hybrid%sativa%' THEN 'SATIVA_DOMINANT'
|
||||||
|
WHEN strain_type ILIKE '%indica%' THEN 'INDICA'
|
||||||
|
WHEN strain_type ILIKE '%sativa%' THEN 'SATIVA'
|
||||||
|
WHEN strain_type ILIKE '%hybrid%' THEN 'HYBRID'
|
||||||
|
WHEN strain_type ILIKE '%cbd%' THEN 'CBD'
|
||||||
|
ELSE NULL
|
||||||
|
END
|
||||||
|
WHERE strain_type IS NOT NULL
|
||||||
|
AND cannabis_type IS NULL;
|
||||||
|
|
||||||
|
-- Backfill canonical IDs from POSMetaData
|
||||||
|
UPDATE store_products
|
||||||
|
SET
|
||||||
|
canonical_strain_id = provider_data->'POSMetaData'->>'canonicalStrainId',
|
||||||
|
canonical_vendor_id = provider_data->'POSMetaData'->>'canonicalVendorId',
|
||||||
|
canonical_brand_id = provider_data->'POSMetaData'->>'canonicalBrandId',
|
||||||
|
canonical_category_id = provider_data->'POSMetaData'->>'canonicalCategoryId'
|
||||||
|
WHERE provider_data->'POSMetaData' IS NOT NULL
|
||||||
|
AND canonical_strain_id IS NULL;
|
||||||
|
|
||||||
|
-- Backfill lab result URLs
|
||||||
|
UPDATE store_products
|
||||||
|
SET lab_result_url = provider_data->'POSMetaData'->>'canonicalLabResultUrl'
|
||||||
|
WHERE provider_data->'POSMetaData'->>'canonicalLabResultUrl' IS NOT NULL
|
||||||
|
AND lab_result_url IS NULL;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- INDEXES
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_cannabis_type ON store_products(cannabis_type);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_vendor_id ON store_products(canonical_vendor_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_strain_id ON store_products(canonical_strain_id);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- ADD MSO FLAG TO DISPENSARIES
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Multi-State Operator flag (calculated from chain presence in multiple states)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_mso BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Update MSO flag based on chain presence in multiple states
|
||||||
|
WITH mso_chains AS (
|
||||||
|
SELECT chain_id
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE chain_id IS NOT NULL
|
||||||
|
GROUP BY chain_id
|
||||||
|
HAVING COUNT(DISTINCT state) > 1
|
||||||
|
)
|
||||||
|
UPDATE dispensaries d
|
||||||
|
SET is_mso = TRUE
|
||||||
|
WHERE d.chain_id IN (SELECT chain_id FROM mso_chains);
|
||||||
|
|
||||||
|
-- Index for MSO queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_is_mso ON dispensaries(is_mso) WHERE is_mso = TRUE;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- PRODUCT DISTRIBUTION VIEW
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- View: How many stores carry each product (by brand + canonical name)
|
||||||
|
CREATE OR REPLACE VIEW v_product_distribution AS
|
||||||
|
SELECT
|
||||||
|
sp.brand_name_raw as brand,
|
||||||
|
sp.c_name as product_canonical_name,
|
||||||
|
COUNT(DISTINCT sp.dispensary_id) as store_count,
|
||||||
|
COUNT(DISTINCT d.state) as state_count,
|
||||||
|
ARRAY_AGG(DISTINCT d.state) as states,
|
||||||
|
AVG(sp.price_rec) as avg_price,
|
||||||
|
MIN(sp.price_rec) as min_price,
|
||||||
|
MAX(sp.price_rec) as max_price
|
||||||
|
FROM store_products sp
|
||||||
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
WHERE sp.c_name IS NOT NULL
|
||||||
|
AND sp.brand_name_raw IS NOT NULL
|
||||||
|
AND sp.is_in_stock = TRUE
|
||||||
|
GROUP BY sp.brand_name_raw, sp.c_name
|
||||||
|
HAVING COUNT(DISTINCT sp.dispensary_id) > 1
|
||||||
|
ORDER BY store_count DESC;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- MSO SUMMARY VIEW
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW v_mso_summary AS
|
||||||
|
SELECT
|
||||||
|
c.name as chain_name,
|
||||||
|
COUNT(DISTINCT d.id) as store_count,
|
||||||
|
COUNT(DISTINCT d.state) as state_count,
|
||||||
|
ARRAY_AGG(DISTINCT d.state ORDER BY d.state) as states,
|
||||||
|
SUM(d.product_count) as total_products,
|
||||||
|
TRUE as is_mso
|
||||||
|
FROM dispensaries d
|
||||||
|
JOIN chains c ON c.id = d.chain_id
|
||||||
|
WHERE d.chain_id IN (
|
||||||
|
SELECT chain_id
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE chain_id IS NOT NULL
|
||||||
|
GROUP BY chain_id
|
||||||
|
HAVING COUNT(DISTINCT state) > 1
|
||||||
|
)
|
||||||
|
GROUP BY c.id, c.name
|
||||||
|
ORDER BY state_count DESC, store_count DESC;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- COMMENTS
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
COMMENT ON COLUMN store_products.cannabis_type IS 'Normalized cannabis classification: SATIVA, INDICA, HYBRID, INDICA_DOMINANT, SATIVA_DOMINANT, CBD';
|
||||||
|
COMMENT ON COLUMN store_products.canonical_strain_id IS 'POS system strain identifier for cross-store matching';
|
||||||
|
COMMENT ON COLUMN store_products.canonical_vendor_id IS 'POS system vendor/supplier identifier';
|
||||||
|
COMMENT ON COLUMN store_products.lab_result_url IS 'Link to Certificate of Analysis / lab test results';
|
||||||
|
COMMENT ON COLUMN dispensaries.is_mso IS 'Multi-State Operator: chain operates in 2+ states';
|
||||||
|
COMMENT ON VIEW v_product_distribution IS 'Shows how many stores carry each product for distribution analysis';
|
||||||
|
COMMENT ON VIEW v_mso_summary IS 'Summary of multi-state operator chains';
|
||||||
73
backend/migrations/124_timescaledb_snapshots.sql
Normal file
73
backend/migrations/124_timescaledb_snapshots.sql
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
-- Migration 124: Convert inventory_snapshots to TimescaleDB hypertable
|
||||||
|
-- Requires: CREATE EXTENSION timescaledb; (run after installing TimescaleDB)
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STEP 1: Enable TimescaleDB extension
|
||||||
|
-- ============================================================
|
||||||
|
CREATE EXTENSION IF NOT EXISTS timescaledb;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STEP 2: Convert to hypertable
|
||||||
|
-- ============================================================
|
||||||
|
-- Note: Table must have a time column and no foreign key constraints
|
||||||
|
|
||||||
|
-- First, drop any foreign keys if they exist
|
||||||
|
ALTER TABLE inventory_snapshots DROP CONSTRAINT IF EXISTS inventory_snapshots_dispensary_id_fkey;
|
||||||
|
|
||||||
|
-- Convert to hypertable, partitioned by captured_at (1 day chunks)
|
||||||
|
SELECT create_hypertable(
|
||||||
|
'inventory_snapshots',
|
||||||
|
'captured_at',
|
||||||
|
chunk_time_interval => INTERVAL '1 day',
|
||||||
|
if_not_exists => TRUE,
|
||||||
|
migrate_data => TRUE
|
||||||
|
);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STEP 3: Enable compression
|
||||||
|
-- ============================================================
|
||||||
|
-- Compress by dispensary_id and product_id (common query patterns)
|
||||||
|
ALTER TABLE inventory_snapshots SET (
|
||||||
|
timescaledb.compress,
|
||||||
|
timescaledb.compress_segmentby = 'dispensary_id, product_id',
|
||||||
|
timescaledb.compress_orderby = 'captured_at DESC'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STEP 4: Compression policy (compress chunks older than 1 day)
|
||||||
|
-- ============================================================
|
||||||
|
SELECT add_compression_policy('inventory_snapshots', INTERVAL '1 day');
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STEP 5: Retention policy (optional - drop chunks older than 90 days)
|
||||||
|
-- ============================================================
|
||||||
|
-- Uncomment if you want automatic cleanup:
|
||||||
|
-- SELECT add_retention_policy('inventory_snapshots', INTERVAL '90 days');
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STEP 6: Optimize indexes for time-series queries
|
||||||
|
-- ============================================================
|
||||||
|
-- TimescaleDB automatically creates time-based indexes
|
||||||
|
-- Add composite index for common queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_snapshots_disp_prod_time
|
||||||
|
ON inventory_snapshots (dispensary_id, product_id, captured_at DESC);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VERIFICATION QUERIES (run after migration)
|
||||||
|
-- ============================================================
|
||||||
|
-- Check hypertable status:
|
||||||
|
-- SELECT * FROM timescaledb_information.hypertables WHERE hypertable_name = 'inventory_snapshots';
|
||||||
|
|
||||||
|
-- Check compression status:
|
||||||
|
-- SELECT * FROM timescaledb_information.compression_settings WHERE hypertable_name = 'inventory_snapshots';
|
||||||
|
|
||||||
|
-- Check chunk sizes:
|
||||||
|
-- SELECT chunk_name, pg_size_pretty(before_compression_total_bytes) as before,
|
||||||
|
-- pg_size_pretty(after_compression_total_bytes) as after,
|
||||||
|
-- round(100 - (after_compression_total_bytes::numeric / before_compression_total_bytes * 100), 1) as compression_pct
|
||||||
|
-- FROM chunk_compression_stats('inventory_snapshots');
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- COMMENTS
|
||||||
|
-- ============================================================
|
||||||
|
COMMENT ON TABLE inventory_snapshots IS 'TimescaleDB hypertable for inventory time-series data. Compressed after 1 day.';
|
||||||
402
backend/migrations/125_delta_only_snapshots.sql
Normal file
402
backend/migrations/125_delta_only_snapshots.sql
Normal file
@@ -0,0 +1,402 @@
|
|||||||
|
-- Migration 125: Delta-only inventory snapshots
|
||||||
|
-- Only store a row when something meaningful changes
|
||||||
|
-- Revenue calculated as: effective_price × qty_sold
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- ADD DELTA TRACKING COLUMNS
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Previous values (to show what changed)
|
||||||
|
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_quantity INTEGER;
|
||||||
|
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_price_rec DECIMAL(10,2);
|
||||||
|
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_price_med DECIMAL(10,2);
|
||||||
|
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_status VARCHAR(50);
|
||||||
|
|
||||||
|
-- Calculated deltas
|
||||||
|
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS qty_delta INTEGER; -- negative = sold, positive = restocked
|
||||||
|
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS price_delta DECIMAL(10,2);
|
||||||
|
|
||||||
|
-- Change type flags
|
||||||
|
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS change_type VARCHAR(50); -- 'sale', 'restock', 'price_change', 'oos', 'back_in_stock'
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- INDEX FOR CHANGE TYPE QUERIES
|
||||||
|
-- ============================================================
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_snapshots_change_type ON inventory_snapshots(change_type);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_snapshots_qty_delta ON inventory_snapshots(qty_delta) WHERE qty_delta != 0;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW: Latest product state (for delta comparison)
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE VIEW v_product_latest_state AS
|
||||||
|
SELECT DISTINCT ON (dispensary_id, product_id)
|
||||||
|
dispensary_id,
|
||||||
|
product_id,
|
||||||
|
quantity_available,
|
||||||
|
price_rec,
|
||||||
|
price_med,
|
||||||
|
status,
|
||||||
|
captured_at
|
||||||
|
FROM inventory_snapshots
|
||||||
|
ORDER BY dispensary_id, product_id, captured_at DESC;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- FUNCTION: Check if product state changed
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION should_capture_snapshot(
|
||||||
|
p_dispensary_id INTEGER,
|
||||||
|
p_product_id TEXT,
|
||||||
|
p_quantity INTEGER,
|
||||||
|
p_price_rec DECIMAL,
|
||||||
|
p_price_med DECIMAL,
|
||||||
|
p_status VARCHAR
|
||||||
|
) RETURNS TABLE (
|
||||||
|
should_capture BOOLEAN,
|
||||||
|
prev_quantity INTEGER,
|
||||||
|
prev_price_rec DECIMAL,
|
||||||
|
prev_price_med DECIMAL,
|
||||||
|
prev_status VARCHAR,
|
||||||
|
qty_delta INTEGER,
|
||||||
|
price_delta DECIMAL,
|
||||||
|
change_type VARCHAR
|
||||||
|
) AS $$
|
||||||
|
DECLARE
|
||||||
|
v_prev RECORD;
|
||||||
|
BEGIN
|
||||||
|
-- Get previous state
|
||||||
|
SELECT
|
||||||
|
ls.quantity_available,
|
||||||
|
ls.price_rec,
|
||||||
|
ls.price_med,
|
||||||
|
ls.status
|
||||||
|
INTO v_prev
|
||||||
|
FROM v_product_latest_state ls
|
||||||
|
WHERE ls.dispensary_id = p_dispensary_id
|
||||||
|
AND ls.product_id = p_product_id;
|
||||||
|
|
||||||
|
-- First time seeing this product
|
||||||
|
IF NOT FOUND THEN
|
||||||
|
RETURN QUERY SELECT
|
||||||
|
TRUE,
|
||||||
|
NULL::INTEGER,
|
||||||
|
NULL::DECIMAL,
|
||||||
|
NULL::DECIMAL,
|
||||||
|
NULL::VARCHAR,
|
||||||
|
NULL::INTEGER,
|
||||||
|
NULL::DECIMAL,
|
||||||
|
'new_product'::VARCHAR;
|
||||||
|
RETURN;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Check for changes
|
||||||
|
IF v_prev.quantity_available IS DISTINCT FROM p_quantity
|
||||||
|
OR v_prev.price_rec IS DISTINCT FROM p_price_rec
|
||||||
|
OR v_prev.price_med IS DISTINCT FROM p_price_med
|
||||||
|
OR v_prev.status IS DISTINCT FROM p_status THEN
|
||||||
|
|
||||||
|
RETURN QUERY SELECT
|
||||||
|
TRUE,
|
||||||
|
v_prev.quantity_available,
|
||||||
|
v_prev.price_rec,
|
||||||
|
v_prev.price_med,
|
||||||
|
v_prev.status,
|
||||||
|
COALESCE(p_quantity, 0) - COALESCE(v_prev.quantity_available, 0),
|
||||||
|
COALESCE(p_price_rec, 0) - COALESCE(v_prev.price_rec, 0),
|
||||||
|
CASE
|
||||||
|
WHEN COALESCE(p_quantity, 0) < COALESCE(v_prev.quantity_available, 0) THEN 'sale'
|
||||||
|
WHEN COALESCE(p_quantity, 0) > COALESCE(v_prev.quantity_available, 0) THEN 'restock'
|
||||||
|
WHEN p_quantity = 0 AND v_prev.quantity_available > 0 THEN 'oos'
|
||||||
|
WHEN p_quantity > 0 AND v_prev.quantity_available = 0 THEN 'back_in_stock'
|
||||||
|
WHEN p_price_rec IS DISTINCT FROM v_prev.price_rec THEN 'price_change'
|
||||||
|
ELSE 'status_change'
|
||||||
|
END;
|
||||||
|
RETURN;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- No change
|
||||||
|
RETURN QUERY SELECT
|
||||||
|
FALSE,
|
||||||
|
NULL::INTEGER,
|
||||||
|
NULL::DECIMAL,
|
||||||
|
NULL::DECIMAL,
|
||||||
|
NULL::VARCHAR,
|
||||||
|
NULL::INTEGER,
|
||||||
|
NULL::DECIMAL,
|
||||||
|
NULL::VARCHAR;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- REVENUE CALCULATION COLUMNS
|
||||||
|
-- ============================================================
|
||||||
|
-- Effective prices (sale price if on special, otherwise regular)
|
||||||
|
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS effective_price_rec DECIMAL(10,2);
|
||||||
|
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS effective_price_med DECIMAL(10,2);
|
||||||
|
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS is_on_special BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Revenue by market type
|
||||||
|
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS revenue_rec DECIMAL(10,2);
|
||||||
|
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS revenue_med DECIMAL(10,2);
|
||||||
|
|
||||||
|
-- Time between snapshots (for velocity calc)
|
||||||
|
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS time_since_last_snapshot INTERVAL;
|
||||||
|
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS hours_since_last DECIMAL(10,2);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW: Hourly Sales Velocity
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE VIEW v_hourly_sales AS
|
||||||
|
SELECT
|
||||||
|
dispensary_id,
|
||||||
|
DATE(captured_at) as sale_date,
|
||||||
|
EXTRACT(HOUR FROM captured_at) as sale_hour,
|
||||||
|
COUNT(*) FILTER (WHERE qty_delta < 0) as transactions,
|
||||||
|
SUM(ABS(qty_delta)) FILTER (WHERE qty_delta < 0) as units_sold,
|
||||||
|
SUM(revenue_estimate) FILTER (WHERE qty_delta < 0) as revenue,
|
||||||
|
COUNT(DISTINCT product_id) FILTER (WHERE qty_delta < 0) as unique_products_sold
|
||||||
|
FROM inventory_snapshots
|
||||||
|
WHERE change_type = 'sale'
|
||||||
|
GROUP BY dispensary_id, DATE(captured_at), EXTRACT(HOUR FROM captured_at);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW: Daily Sales by Store
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE VIEW v_daily_store_sales AS
|
||||||
|
SELECT
|
||||||
|
s.dispensary_id,
|
||||||
|
d.name as store_name,
|
||||||
|
d.state,
|
||||||
|
DATE(s.captured_at) as sale_date,
|
||||||
|
SUM(ABS(s.qty_delta)) as units_sold,
|
||||||
|
SUM(s.revenue_estimate) as revenue,
|
||||||
|
COUNT(*) as sale_events,
|
||||||
|
COUNT(DISTINCT s.product_id) as unique_products
|
||||||
|
FROM inventory_snapshots s
|
||||||
|
JOIN dispensaries d ON d.id = s.dispensary_id
|
||||||
|
WHERE s.change_type = 'sale'
|
||||||
|
GROUP BY s.dispensary_id, d.name, d.state, DATE(s.captured_at);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW: Daily Sales by Brand
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE VIEW v_daily_brand_sales AS
|
||||||
|
SELECT
|
||||||
|
s.brand_name,
|
||||||
|
d.state,
|
||||||
|
DATE(s.captured_at) as sale_date,
|
||||||
|
SUM(ABS(s.qty_delta)) as units_sold,
|
||||||
|
SUM(s.revenue_estimate) as revenue,
|
||||||
|
COUNT(DISTINCT s.dispensary_id) as stores_with_sales,
|
||||||
|
COUNT(DISTINCT s.product_id) as unique_skus_sold
|
||||||
|
FROM inventory_snapshots s
|
||||||
|
JOIN dispensaries d ON d.id = s.dispensary_id
|
||||||
|
WHERE s.change_type = 'sale'
|
||||||
|
AND s.brand_name IS NOT NULL
|
||||||
|
GROUP BY s.brand_name, d.state, DATE(s.captured_at);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW: Product Velocity Rankings
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE VIEW v_product_velocity AS
|
||||||
|
SELECT
|
||||||
|
s.product_id,
|
||||||
|
s.brand_name,
|
||||||
|
s.category,
|
||||||
|
s.dispensary_id,
|
||||||
|
d.name as store_name,
|
||||||
|
d.state,
|
||||||
|
SUM(ABS(s.qty_delta)) as units_sold_30d,
|
||||||
|
SUM(s.revenue_estimate) as revenue_30d,
|
||||||
|
COUNT(*) as sale_events,
|
||||||
|
ROUND(SUM(ABS(s.qty_delta))::NUMERIC / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0), 2) as avg_daily_units,
|
||||||
|
ROUND(SUM(s.revenue_estimate) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0), 2) as avg_daily_revenue,
|
||||||
|
CASE
|
||||||
|
WHEN SUM(ABS(s.qty_delta)) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0) >= 10 THEN 'hot'
|
||||||
|
WHEN SUM(ABS(s.qty_delta)) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0) >= 3 THEN 'steady'
|
||||||
|
WHEN SUM(ABS(s.qty_delta)) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0) >= 1 THEN 'slow'
|
||||||
|
ELSE 'stale'
|
||||||
|
END as velocity_tier
|
||||||
|
FROM inventory_snapshots s
|
||||||
|
JOIN dispensaries d ON d.id = s.dispensary_id
|
||||||
|
WHERE s.change_type = 'sale'
|
||||||
|
AND s.captured_at >= NOW() - INTERVAL '30 days'
|
||||||
|
GROUP BY s.product_id, s.brand_name, s.category, s.dispensary_id, d.name, d.state;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW: Busiest Hours by Store
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE VIEW v_busiest_hours AS
|
||||||
|
SELECT
|
||||||
|
dispensary_id,
|
||||||
|
sale_hour,
|
||||||
|
AVG(units_sold) as avg_units_per_hour,
|
||||||
|
AVG(revenue) as avg_revenue_per_hour,
|
||||||
|
SUM(units_sold) as total_units,
|
||||||
|
SUM(revenue) as total_revenue,
|
||||||
|
COUNT(*) as days_with_data,
|
||||||
|
RANK() OVER (PARTITION BY dispensary_id ORDER BY AVG(revenue) DESC) as hour_rank
|
||||||
|
FROM v_hourly_sales
|
||||||
|
GROUP BY dispensary_id, sale_hour;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW: Promotion Effectiveness (compare sale vs non-sale prices)
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE VIEW v_promotion_effectiveness AS
|
||||||
|
SELECT
|
||||||
|
s.dispensary_id,
|
||||||
|
d.name as store_name,
|
||||||
|
s.product_id,
|
||||||
|
s.brand_name,
|
||||||
|
DATE(s.captured_at) as sale_date,
|
||||||
|
SUM(ABS(s.qty_delta)) FILTER (WHERE s.price_rec < s.prev_price_rec) as units_on_discount,
|
||||||
|
SUM(ABS(s.qty_delta)) FILTER (WHERE s.price_rec >= COALESCE(s.prev_price_rec, s.price_rec)) as units_full_price,
|
||||||
|
SUM(s.revenue_estimate) FILTER (WHERE s.price_rec < s.prev_price_rec) as revenue_discounted,
|
||||||
|
SUM(s.revenue_estimate) FILTER (WHERE s.price_rec >= COALESCE(s.prev_price_rec, s.price_rec)) as revenue_full_price
|
||||||
|
FROM inventory_snapshots s
|
||||||
|
JOIN dispensaries d ON d.id = s.dispensary_id
|
||||||
|
WHERE s.change_type = 'sale'
|
||||||
|
GROUP BY s.dispensary_id, d.name, s.product_id, s.brand_name, DATE(s.captured_at);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- COMMENTS
|
||||||
|
-- ============================================================
|
||||||
|
COMMENT ON COLUMN inventory_snapshots.qty_delta IS 'Quantity change: negative=sold, positive=restocked';
|
||||||
|
COMMENT ON COLUMN inventory_snapshots.revenue_estimate IS 'Estimated revenue: ABS(qty_delta) * price_rec when qty_delta < 0';
|
||||||
|
COMMENT ON COLUMN inventory_snapshots.change_type IS 'Type of change: sale, restock, price_change, oos, back_in_stock, new_product';
|
||||||
|
COMMENT ON FUNCTION should_capture_snapshot IS 'Returns whether a snapshot should be captured and delta values';
|
||||||
|
COMMENT ON VIEW v_hourly_sales IS 'Sales aggregated by hour - find busiest times';
|
||||||
|
COMMENT ON VIEW v_daily_store_sales IS 'Daily revenue by store';
|
||||||
|
COMMENT ON VIEW v_daily_brand_sales IS 'Daily brand performance by state';
|
||||||
|
COMMENT ON VIEW v_product_velocity IS 'Product sales velocity rankings (hot/steady/slow/stale)';
|
||||||
|
COMMENT ON VIEW v_busiest_hours IS 'Rank hours by sales volume per store';
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW: Days Until Stock Out (Predictive)
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE VIEW v_stock_out_prediction AS
|
||||||
|
WITH velocity AS (
|
||||||
|
SELECT
|
||||||
|
dispensary_id,
|
||||||
|
product_id,
|
||||||
|
brand_name,
|
||||||
|
-- Average units sold per day (last 7 days)
|
||||||
|
ROUND(SUM(ABS(qty_delta))::NUMERIC / NULLIF(COUNT(DISTINCT DATE(captured_at)), 0), 2) as daily_velocity,
|
||||||
|
-- Hours between sales
|
||||||
|
AVG(hours_since_last) FILTER (WHERE qty_delta < 0) as avg_hours_between_sales
|
||||||
|
FROM inventory_snapshots
|
||||||
|
WHERE change_type = 'sale'
|
||||||
|
AND captured_at >= NOW() - INTERVAL '7 days'
|
||||||
|
GROUP BY dispensary_id, product_id, brand_name
|
||||||
|
),
|
||||||
|
current_stock AS (
|
||||||
|
SELECT DISTINCT ON (dispensary_id, product_id)
|
||||||
|
dispensary_id,
|
||||||
|
product_id,
|
||||||
|
quantity_available as current_qty,
|
||||||
|
captured_at as last_seen
|
||||||
|
FROM inventory_snapshots
|
||||||
|
ORDER BY dispensary_id, product_id, captured_at DESC
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
cs.dispensary_id,
|
||||||
|
d.name as store_name,
|
||||||
|
cs.product_id,
|
||||||
|
v.brand_name,
|
||||||
|
cs.current_qty,
|
||||||
|
v.daily_velocity,
|
||||||
|
CASE
|
||||||
|
WHEN v.daily_velocity > 0 THEN ROUND(cs.current_qty / v.daily_velocity, 1)
|
||||||
|
ELSE NULL
|
||||||
|
END as days_until_stock_out,
|
||||||
|
CASE
|
||||||
|
WHEN v.daily_velocity > 0 AND cs.current_qty / v.daily_velocity <= 3 THEN 'critical'
|
||||||
|
WHEN v.daily_velocity > 0 AND cs.current_qty / v.daily_velocity <= 7 THEN 'low'
|
||||||
|
WHEN v.daily_velocity > 0 AND cs.current_qty / v.daily_velocity <= 14 THEN 'moderate'
|
||||||
|
ELSE 'healthy'
|
||||||
|
END as stock_health,
|
||||||
|
cs.last_seen
|
||||||
|
FROM current_stock cs
|
||||||
|
JOIN velocity v ON v.dispensary_id = cs.dispensary_id AND v.product_id = cs.product_id
|
||||||
|
JOIN dispensaries d ON d.id = cs.dispensary_id
|
||||||
|
WHERE cs.current_qty > 0
|
||||||
|
AND v.daily_velocity > 0;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW: Days Since OOS (for products currently out of stock)
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE VIEW v_days_since_oos AS
|
||||||
|
SELECT
|
||||||
|
s.dispensary_id,
|
||||||
|
d.name as store_name,
|
||||||
|
s.product_id,
|
||||||
|
s.brand_name,
|
||||||
|
s.captured_at as went_oos_at,
|
||||||
|
EXTRACT(EPOCH FROM (NOW() - s.captured_at)) / 86400 as days_since_oos,
|
||||||
|
s.prev_quantity as last_known_qty
|
||||||
|
FROM inventory_snapshots s
|
||||||
|
JOIN dispensaries d ON d.id = s.dispensary_id
|
||||||
|
WHERE s.change_type = 'oos'
|
||||||
|
AND NOT EXISTS (
|
||||||
|
-- No back_in_stock event after this OOS
|
||||||
|
SELECT 1 FROM inventory_snapshots s2
|
||||||
|
WHERE s2.dispensary_id = s.dispensary_id
|
||||||
|
AND s2.product_id = s.product_id
|
||||||
|
AND s2.change_type = 'back_in_stock'
|
||||||
|
AND s2.captured_at > s.captured_at
|
||||||
|
);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW: Brand Variant Counts (track brand growth)
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE VIEW v_brand_variants AS
|
||||||
|
SELECT
|
||||||
|
sp.brand_name_raw as brand_name,
|
||||||
|
d.state,
|
||||||
|
COUNT(DISTINCT sp.id) as total_variants,
|
||||||
|
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock = TRUE) as active_variants,
|
||||||
|
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock = FALSE) as inactive_variants,
|
||||||
|
COUNT(DISTINCT sp.dispensary_id) as stores_carrying,
|
||||||
|
COUNT(DISTINCT sp.category_raw) as categories,
|
||||||
|
MIN(sp.first_seen_at) as brand_first_seen,
|
||||||
|
MAX(sp.last_seen_at) as brand_last_seen
|
||||||
|
FROM store_products sp
|
||||||
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
WHERE sp.brand_name_raw IS NOT NULL
|
||||||
|
GROUP BY sp.brand_name_raw, d.state;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW: Brand Growth (compare variant counts over time)
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE VIEW v_brand_growth AS
|
||||||
|
WITH weekly_counts AS (
|
||||||
|
SELECT
|
||||||
|
brand_name_raw as brand_name,
|
||||||
|
DATE_TRUNC('week', last_seen_at) as week,
|
||||||
|
COUNT(DISTINCT id) as variant_count
|
||||||
|
FROM store_products
|
||||||
|
WHERE brand_name_raw IS NOT NULL
|
||||||
|
AND last_seen_at >= NOW() - INTERVAL '90 days'
|
||||||
|
GROUP BY brand_name_raw, DATE_TRUNC('week', last_seen_at)
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
w1.brand_name,
|
||||||
|
w1.week as current_week,
|
||||||
|
w1.variant_count as current_variants,
|
||||||
|
w2.variant_count as prev_week_variants,
|
||||||
|
w1.variant_count - COALESCE(w2.variant_count, 0) as variant_change,
|
||||||
|
CASE
|
||||||
|
WHEN w2.variant_count IS NULL THEN 'new'
|
||||||
|
WHEN w1.variant_count > w2.variant_count THEN 'growing'
|
||||||
|
WHEN w1.variant_count < w2.variant_count THEN 'declining'
|
||||||
|
ELSE 'stable'
|
||||||
|
END as growth_status
|
||||||
|
FROM weekly_counts w1
|
||||||
|
LEFT JOIN weekly_counts w2
|
||||||
|
ON w2.brand_name = w1.brand_name
|
||||||
|
AND w2.week = w1.week - INTERVAL '1 week'
|
||||||
|
ORDER BY w1.brand_name, w1.week DESC;
|
||||||
|
|
||||||
|
COMMENT ON VIEW v_stock_out_prediction IS 'Predict days until stock out based on velocity';
|
||||||
|
COMMENT ON VIEW v_days_since_oos IS 'Products currently OOS and how long they have been out';
|
||||||
|
COMMENT ON VIEW v_brand_variants IS 'Active vs inactive SKU counts per brand per state';
|
||||||
|
COMMENT ON VIEW v_brand_growth IS 'Week-over-week brand variant growth tracking';
|
||||||
53
backend/migrations/126_az_high_frequency.sql
Normal file
53
backend/migrations/126_az_high_frequency.sql
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
-- Migration 126: Set AZ stores to 5-minute high-frequency crawls
|
||||||
|
-- Other states default to 60-minute (1 hour) intervals
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- SET AZ STORES TO 5-MINUTE INTERVALS (with 3-min jitter)
|
||||||
|
-- ============================================================
|
||||||
|
-- Base interval: 5 minutes
|
||||||
|
-- Jitter: +/- 3 minutes (so 2-8 minute effective range)
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET
|
||||||
|
crawl_interval_minutes = 5,
|
||||||
|
next_crawl_at = NOW() + (RANDOM() * INTERVAL '5 minutes') -- Stagger initial crawls
|
||||||
|
WHERE state = 'AZ'
|
||||||
|
AND crawl_enabled = TRUE;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- SET OTHER STATES TO 60-MINUTE INTERVALS (with 3-min jitter)
|
||||||
|
-- ============================================================
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET
|
||||||
|
crawl_interval_minutes = 60,
|
||||||
|
next_crawl_at = NOW() + (RANDOM() * INTERVAL '60 minutes') -- Stagger initial crawls
|
||||||
|
WHERE state != 'AZ'
|
||||||
|
AND crawl_enabled = TRUE
|
||||||
|
AND crawl_interval_minutes IS NULL;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VERIFY RESULTS
|
||||||
|
-- ============================================================
|
||||||
|
-- SELECT state, crawl_interval_minutes, COUNT(*)
|
||||||
|
-- FROM dispensaries
|
||||||
|
-- WHERE crawl_enabled = TRUE
|
||||||
|
-- GROUP BY state, crawl_interval_minutes
|
||||||
|
-- ORDER BY state;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- CREATE VIEW FOR MONITORING CRAWL LOAD
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE VIEW v_crawl_load AS
|
||||||
|
SELECT
|
||||||
|
state,
|
||||||
|
crawl_interval_minutes,
|
||||||
|
COUNT(*) as store_count,
|
||||||
|
-- Crawls per hour = stores * (60 / interval)
|
||||||
|
ROUND(COUNT(*) * (60.0 / COALESCE(crawl_interval_minutes, 60))) as crawls_per_hour,
|
||||||
|
-- Assuming 30 sec per crawl, workers needed = crawls_per_hour / 120
|
||||||
|
ROUND(COUNT(*) * (60.0 / COALESCE(crawl_interval_minutes, 60)) / 120, 1) as workers_needed
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE crawl_enabled = TRUE
|
||||||
|
GROUP BY state, crawl_interval_minutes
|
||||||
|
ORDER BY crawls_per_hour DESC;
|
||||||
|
|
||||||
|
COMMENT ON VIEW v_crawl_load IS 'Monitor crawl load by state and interval';
|
||||||
164
backend/migrations/127_fix_worker_task_limit.sql
Normal file
164
backend/migrations/127_fix_worker_task_limit.sql
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
-- Migration 127: Fix worker task concurrency limit
|
||||||
|
-- Problem: claim_task function checks session_task_count but never increments it
|
||||||
|
-- Solution: Increment on claim, decrement on complete/fail/release
|
||||||
|
|
||||||
|
-- =============================================================================
|
||||||
|
-- STEP 1: Set max tasks to 5 for all workers
|
||||||
|
-- =============================================================================
|
||||||
|
UPDATE worker_registry SET session_max_tasks = 5;
|
||||||
|
|
||||||
|
-- Set default to 5 for new workers
|
||||||
|
ALTER TABLE worker_registry ALTER COLUMN session_max_tasks SET DEFAULT 5;
|
||||||
|
|
||||||
|
-- =============================================================================
|
||||||
|
-- STEP 2: Reset all session_task_count to match actual active tasks
|
||||||
|
-- =============================================================================
|
||||||
|
UPDATE worker_registry wr SET session_task_count = (
|
||||||
|
SELECT COUNT(*) FROM worker_tasks wt
|
||||||
|
WHERE wt.worker_id = wr.worker_id
|
||||||
|
AND wt.status IN ('claimed', 'running')
|
||||||
|
);
|
||||||
|
|
||||||
|
-- =============================================================================
|
||||||
|
-- STEP 3: Update claim_task function to increment session_task_count
|
||||||
|
-- =============================================================================
|
||||||
|
CREATE OR REPLACE FUNCTION claim_task(
|
||||||
|
p_role VARCHAR(50),
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_curl_passed BOOLEAN DEFAULT TRUE,
|
||||||
|
p_http_passed BOOLEAN DEFAULT FALSE
|
||||||
|
) RETURNS worker_tasks AS $$
|
||||||
|
DECLARE
|
||||||
|
claimed_task worker_tasks;
|
||||||
|
worker_state VARCHAR(2);
|
||||||
|
session_valid BOOLEAN;
|
||||||
|
session_tasks INT;
|
||||||
|
max_tasks INT;
|
||||||
|
BEGIN
|
||||||
|
-- Get worker's current geo session info
|
||||||
|
SELECT
|
||||||
|
current_state,
|
||||||
|
session_task_count,
|
||||||
|
session_max_tasks,
|
||||||
|
(geo_session_started_at IS NOT NULL AND geo_session_started_at > NOW() - INTERVAL '60 minutes')
|
||||||
|
INTO worker_state, session_tasks, max_tasks, session_valid
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
-- Check if worker has reached max concurrent tasks (default 5)
|
||||||
|
IF session_tasks >= COALESCE(max_tasks, 5) THEN
|
||||||
|
RETURN NULL;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- If no valid geo session, or session expired, worker can't claim tasks
|
||||||
|
-- Worker must re-qualify first
|
||||||
|
IF worker_state IS NULL OR NOT session_valid THEN
|
||||||
|
RETURN NULL;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Claim task matching worker's state
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'claimed',
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
claimed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT wt.id FROM worker_tasks wt
|
||||||
|
JOIN dispensaries d ON wt.dispensary_id = d.id
|
||||||
|
WHERE wt.role = p_role
|
||||||
|
AND wt.status = 'pending'
|
||||||
|
AND (wt.scheduled_for IS NULL OR wt.scheduled_for <= NOW())
|
||||||
|
-- GEO FILTER: Task's dispensary must match worker's state
|
||||||
|
AND d.state = worker_state
|
||||||
|
-- Method compatibility: worker must have passed the required preflight
|
||||||
|
AND (
|
||||||
|
wt.method IS NULL -- No preference, any worker can claim
|
||||||
|
OR (wt.method = 'curl' AND p_curl_passed = TRUE)
|
||||||
|
OR (wt.method = 'http' AND p_http_passed = TRUE)
|
||||||
|
)
|
||||||
|
-- Exclude stores that already have an active task
|
||||||
|
AND (wt.dispensary_id IS NULL OR wt.dispensary_id NOT IN (
|
||||||
|
SELECT dispensary_id FROM worker_tasks
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND dispensary_id IS NOT NULL
|
||||||
|
AND dispensary_id != wt.dispensary_id
|
||||||
|
))
|
||||||
|
ORDER BY wt.priority DESC, wt.created_at ASC
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_task;
|
||||||
|
|
||||||
|
-- INCREMENT session_task_count if we claimed a task
|
||||||
|
IF claimed_task.id IS NOT NULL THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET session_task_count = session_task_count + 1
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN claimed_task;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- =============================================================================
|
||||||
|
-- STEP 4: Create trigger to decrement on task completion/failure/release
|
||||||
|
-- =============================================================================
|
||||||
|
CREATE OR REPLACE FUNCTION decrement_worker_task_count()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
-- Only decrement when task was assigned to a worker and is now complete/released
|
||||||
|
IF OLD.worker_id IS NOT NULL AND OLD.status IN ('claimed', 'running') THEN
|
||||||
|
-- Task completed/failed/released - decrement count
|
||||||
|
IF NEW.status IN ('pending', 'completed', 'failed') OR NEW.worker_id IS NULL THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET session_task_count = GREATEST(0, session_task_count - 1)
|
||||||
|
WHERE worker_id = OLD.worker_id;
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Drop existing trigger if any
|
||||||
|
DROP TRIGGER IF EXISTS trg_decrement_worker_task_count ON worker_tasks;
|
||||||
|
|
||||||
|
-- Create trigger on UPDATE (status change or worker_id cleared)
|
||||||
|
CREATE TRIGGER trg_decrement_worker_task_count
|
||||||
|
AFTER UPDATE ON worker_tasks
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION decrement_worker_task_count();
|
||||||
|
|
||||||
|
-- Also handle DELETE (completed tasks are deleted from pool)
|
||||||
|
CREATE OR REPLACE FUNCTION decrement_worker_task_count_delete()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
IF OLD.worker_id IS NOT NULL AND OLD.status IN ('claimed', 'running') THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET session_task_count = GREATEST(0, session_task_count - 1)
|
||||||
|
WHERE worker_id = OLD.worker_id;
|
||||||
|
END IF;
|
||||||
|
RETURN OLD;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
DROP TRIGGER IF EXISTS trg_decrement_worker_task_count_delete ON worker_tasks;
|
||||||
|
|
||||||
|
CREATE TRIGGER trg_decrement_worker_task_count_delete
|
||||||
|
AFTER DELETE ON worker_tasks
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION decrement_worker_task_count_delete();
|
||||||
|
|
||||||
|
-- =============================================================================
|
||||||
|
-- STEP 5: Verify current state
|
||||||
|
-- =============================================================================
|
||||||
|
SELECT
|
||||||
|
wr.worker_id,
|
||||||
|
wr.friendly_name,
|
||||||
|
wr.session_task_count,
|
||||||
|
wr.session_max_tasks,
|
||||||
|
(SELECT COUNT(*) FROM worker_tasks wt WHERE wt.worker_id = wr.worker_id AND wt.status IN ('claimed', 'running')) as actual_count
|
||||||
|
FROM worker_registry wr
|
||||||
|
WHERE wr.status = 'active'
|
||||||
|
ORDER BY wr.friendly_name;
|
||||||
109
backend/migrations/128_pool_config.sql
Normal file
109
backend/migrations/128_pool_config.sql
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
-- Migration 128: Pool configuration table
|
||||||
|
-- Controls whether workers can claim tasks from the pool
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS pool_config (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
pool_open BOOLEAN NOT NULL DEFAULT true,
|
||||||
|
closed_reason TEXT,
|
||||||
|
closed_at TIMESTAMPTZ,
|
||||||
|
closed_by VARCHAR(100),
|
||||||
|
opened_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Insert default config (pool open)
|
||||||
|
INSERT INTO pool_config (pool_open, opened_at)
|
||||||
|
VALUES (true, NOW())
|
||||||
|
ON CONFLICT DO NOTHING;
|
||||||
|
|
||||||
|
-- Update claim_task function to check pool status
|
||||||
|
CREATE OR REPLACE FUNCTION claim_task(
|
||||||
|
p_role VARCHAR(50),
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_curl_passed BOOLEAN DEFAULT TRUE,
|
||||||
|
p_http_passed BOOLEAN DEFAULT FALSE
|
||||||
|
) RETURNS worker_tasks AS $$
|
||||||
|
DECLARE
|
||||||
|
claimed_task worker_tasks;
|
||||||
|
worker_state VARCHAR(2);
|
||||||
|
session_valid BOOLEAN;
|
||||||
|
session_tasks INT;
|
||||||
|
max_tasks INT;
|
||||||
|
is_pool_open BOOLEAN;
|
||||||
|
BEGIN
|
||||||
|
-- Check if pool is open
|
||||||
|
SELECT pool_open INTO is_pool_open FROM pool_config LIMIT 1;
|
||||||
|
IF NOT COALESCE(is_pool_open, true) THEN
|
||||||
|
RETURN NULL; -- Pool is closed, no claiming allowed
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Get worker's current geo session info
|
||||||
|
SELECT
|
||||||
|
current_state,
|
||||||
|
session_task_count,
|
||||||
|
session_max_tasks,
|
||||||
|
(geo_session_started_at IS NOT NULL AND geo_session_started_at > NOW() - INTERVAL '60 minutes')
|
||||||
|
INTO worker_state, session_tasks, max_tasks, session_valid
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
-- Check if worker has reached max concurrent tasks (default 5)
|
||||||
|
IF session_tasks >= COALESCE(max_tasks, 5) THEN
|
||||||
|
RETURN NULL;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- If no valid geo session, or session expired, worker can't claim tasks
|
||||||
|
-- Worker must re-qualify first
|
||||||
|
IF worker_state IS NULL OR NOT session_valid THEN
|
||||||
|
RETURN NULL;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Claim task matching worker's state
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'claimed',
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
claimed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT wt.id FROM worker_tasks wt
|
||||||
|
JOIN dispensaries d ON wt.dispensary_id = d.id
|
||||||
|
WHERE wt.role = p_role
|
||||||
|
AND wt.status = 'pending'
|
||||||
|
AND (wt.scheduled_for IS NULL OR wt.scheduled_for <= NOW())
|
||||||
|
-- GEO FILTER: Task's dispensary must match worker's state
|
||||||
|
AND d.state = worker_state
|
||||||
|
-- Method compatibility: worker must have passed the required preflight
|
||||||
|
AND (
|
||||||
|
wt.method IS NULL -- No preference, any worker can claim
|
||||||
|
OR (wt.method = 'curl' AND p_curl_passed = TRUE)
|
||||||
|
OR (wt.method = 'http' AND p_http_passed = TRUE)
|
||||||
|
)
|
||||||
|
-- Exclude stores that already have an active task
|
||||||
|
AND (wt.dispensary_id IS NULL OR wt.dispensary_id NOT IN (
|
||||||
|
SELECT dispensary_id FROM worker_tasks
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND dispensary_id IS NOT NULL
|
||||||
|
AND dispensary_id != wt.dispensary_id
|
||||||
|
))
|
||||||
|
ORDER BY wt.priority DESC, wt.created_at ASC
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_task;
|
||||||
|
|
||||||
|
-- INCREMENT session_task_count if we claimed a task
|
||||||
|
IF claimed_task.id IS NOT NULL THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET session_task_count = session_task_count + 1
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN claimed_task;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Verify
|
||||||
|
SELECT 'pool_config table created' as status;
|
||||||
|
SELECT * FROM pool_config;
|
||||||
60
backend/migrations/129_claim_tasks_for_geo.sql
Normal file
60
backend/migrations/129_claim_tasks_for_geo.sql
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
-- Migration 129: Claim tasks for specific geo
|
||||||
|
-- Used after worker gets IP to claim more tasks for same geo
|
||||||
|
|
||||||
|
-- Function: Claim up to N tasks for a SPECIFIC geo (state/city)
|
||||||
|
-- Different from claim_tasks_batch which picks the geo with most tasks
|
||||||
|
CREATE OR REPLACE FUNCTION claim_tasks_batch_for_geo(
|
||||||
|
p_worker_id VARCHAR(255),
|
||||||
|
p_max_tasks INTEGER DEFAULT 4,
|
||||||
|
p_state_code VARCHAR(2),
|
||||||
|
p_city VARCHAR(100) DEFAULT NULL,
|
||||||
|
p_role VARCHAR(50) DEFAULT NULL
|
||||||
|
) RETURNS TABLE (
|
||||||
|
task_id INTEGER,
|
||||||
|
role VARCHAR(50),
|
||||||
|
dispensary_id INTEGER,
|
||||||
|
dispensary_name VARCHAR(255),
|
||||||
|
city VARCHAR(100),
|
||||||
|
state_code VARCHAR(2),
|
||||||
|
platform VARCHAR(50),
|
||||||
|
method VARCHAR(20)
|
||||||
|
) AS $$
|
||||||
|
BEGIN
|
||||||
|
-- Claim up to p_max_tasks for the specified geo
|
||||||
|
RETURN QUERY
|
||||||
|
WITH claimed AS (
|
||||||
|
UPDATE worker_tasks t SET
|
||||||
|
status = 'claimed',
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
claimed_at = NOW()
|
||||||
|
FROM (
|
||||||
|
SELECT t2.id
|
||||||
|
FROM worker_tasks t2
|
||||||
|
JOIN dispensaries d ON t2.dispensary_id = d.id
|
||||||
|
WHERE t2.status = 'pending'
|
||||||
|
AND d.state = p_state_code
|
||||||
|
AND (p_city IS NULL OR d.city = p_city)
|
||||||
|
AND (p_role IS NULL OR t2.role = p_role)
|
||||||
|
ORDER BY t2.priority DESC, t2.created_at ASC
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
LIMIT p_max_tasks
|
||||||
|
) sub
|
||||||
|
WHERE t.id = sub.id
|
||||||
|
RETURNING t.id, t.role, t.dispensary_id, t.method
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
c.id as task_id,
|
||||||
|
c.role,
|
||||||
|
c.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city,
|
||||||
|
d.state as state_code,
|
||||||
|
d.platform,
|
||||||
|
c.method
|
||||||
|
FROM claimed c
|
||||||
|
JOIN dispensaries d ON c.dispensary_id = d.id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Verify
|
||||||
|
SELECT 'claim_tasks_batch_for_geo function created' as status;
|
||||||
49
backend/migrations/130_hoodie_comparison_reports.sql
Normal file
49
backend/migrations/130_hoodie_comparison_reports.sql
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
-- Hoodie Comparison Reports
|
||||||
|
-- Stores delta results from comparing Hoodie data against CannaIQ
|
||||||
|
-- Raw Hoodie data stays remote (proxy only) - we only store comparison results
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS hoodie_comparison_reports (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
report_type VARCHAR(50) NOT NULL, -- 'dispensaries', 'brands', 'products'
|
||||||
|
state VARCHAR(50) NOT NULL,
|
||||||
|
|
||||||
|
-- Counts
|
||||||
|
hoodie_total INT NOT NULL DEFAULT 0,
|
||||||
|
cannaiq_total INT NOT NULL DEFAULT 0,
|
||||||
|
in_both INT NOT NULL DEFAULT 0,
|
||||||
|
hoodie_only INT NOT NULL DEFAULT 0,
|
||||||
|
cannaiq_only INT NOT NULL DEFAULT 0,
|
||||||
|
|
||||||
|
-- Delta details (JSONB for flexibility)
|
||||||
|
hoodie_only_items JSONB DEFAULT '[]', -- Items in Hoodie but not CannaIQ
|
||||||
|
cannaiq_only_items JSONB DEFAULT '[]', -- Items in CannaIQ but not Hoodie
|
||||||
|
matched_items JSONB DEFAULT '[]', -- Items in both (with any differences)
|
||||||
|
|
||||||
|
-- Metadata
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
duration_ms INT, -- How long the comparison took
|
||||||
|
error TEXT -- Any errors during comparison
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for querying latest reports
|
||||||
|
CREATE INDEX idx_hoodie_reports_type_state ON hoodie_comparison_reports(report_type, state, created_at DESC);
|
||||||
|
CREATE INDEX idx_hoodie_reports_created ON hoodie_comparison_reports(created_at DESC);
|
||||||
|
|
||||||
|
-- View for latest report per type/state
|
||||||
|
CREATE OR REPLACE VIEW v_hoodie_latest_reports AS
|
||||||
|
SELECT DISTINCT ON (report_type, state)
|
||||||
|
id,
|
||||||
|
report_type,
|
||||||
|
state,
|
||||||
|
hoodie_total,
|
||||||
|
cannaiq_total,
|
||||||
|
in_both,
|
||||||
|
hoodie_only,
|
||||||
|
cannaiq_only,
|
||||||
|
created_at,
|
||||||
|
duration_ms
|
||||||
|
FROM hoodie_comparison_reports
|
||||||
|
WHERE error IS NULL
|
||||||
|
ORDER BY report_type, state, created_at DESC;
|
||||||
|
|
||||||
|
COMMENT ON TABLE hoodie_comparison_reports IS 'Stores comparison results between Hoodie and CannaIQ data. Raw Hoodie data stays remote.';
|
||||||
53
backend/migrations/130_worker_badge.sql
Normal file
53
backend/migrations/130_worker_badge.sql
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
-- Migration 130: Worker qualification badge
|
||||||
|
-- Session-scoped badge showing worker qualification status
|
||||||
|
|
||||||
|
-- Add badge column to worker_registry
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS badge VARCHAR(20) DEFAULT NULL;
|
||||||
|
|
||||||
|
-- Add qualified_at timestamp
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS qualified_at TIMESTAMPTZ DEFAULT NULL;
|
||||||
|
|
||||||
|
-- Add current_session_id to link worker to their active session
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS current_session_id INTEGER DEFAULT NULL;
|
||||||
|
|
||||||
|
-- Badge values:
|
||||||
|
-- 'gold' = preflight passed, actively qualified with valid session
|
||||||
|
-- NULL = not qualified (no active session or session expired)
|
||||||
|
|
||||||
|
-- Function: Set worker badge to gold when qualified
|
||||||
|
CREATE OR REPLACE FUNCTION set_worker_qualified(
|
||||||
|
p_worker_id VARCHAR(255),
|
||||||
|
p_session_id INTEGER
|
||||||
|
) RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET badge = 'gold',
|
||||||
|
qualified_at = NOW(),
|
||||||
|
current_session_id = p_session_id
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
RETURN FOUND;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Clear worker badge when session ends
|
||||||
|
CREATE OR REPLACE FUNCTION clear_worker_badge(p_worker_id VARCHAR(255))
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET badge = NULL,
|
||||||
|
qualified_at = NULL,
|
||||||
|
current_session_id = NULL
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
RETURN FOUND;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Index for finding qualified workers
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_badge
|
||||||
|
ON worker_registry(badge) WHERE badge IS NOT NULL;
|
||||||
|
|
||||||
|
-- Verify
|
||||||
|
SELECT 'worker_registry badge column added' as status;
|
||||||
21
backend/migrations/131_normalize_brand.sql
Normal file
21
backend/migrations/131_normalize_brand.sql
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
-- Migration: 131_normalize_brand
|
||||||
|
-- Purpose: Add normalize_brand() function for fuzzy brand matching across dispensaries
|
||||||
|
-- Used by Cannabrands integration to match brand names regardless of spelling variations
|
||||||
|
|
||||||
|
-- Function to normalize brand names for matching
|
||||||
|
-- "Aloha TymeMachine" → "alohatymemachine"
|
||||||
|
-- "ALOHA TYME MACHINE" → "alohatymemachine"
|
||||||
|
-- "Aloha Tyme Machine" → "alohatymemachine"
|
||||||
|
CREATE OR REPLACE FUNCTION normalize_brand(name TEXT)
|
||||||
|
RETURNS TEXT AS $$
|
||||||
|
SELECT LOWER(REGEXP_REPLACE(COALESCE(name, ''), '[^a-zA-Z0-9]', '', 'g'))
|
||||||
|
$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
|
||||||
|
|
||||||
|
-- Create functional index for efficient lookups
|
||||||
|
-- This allows queries like: WHERE normalize_brand(brand_name_raw) = 'alohatymemachine'
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_brand_normalized
|
||||||
|
ON store_products (normalize_brand(brand_name_raw));
|
||||||
|
|
||||||
|
-- Also index on snapshots table for historical queries
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_product_snapshots_brand_normalized
|
||||||
|
ON store_product_snapshots (normalize_brand(brand_name_raw));
|
||||||
2294
backend/node_modules/.package-lock.json
generated
vendored
2294
backend/node_modules/.package-lock.json
generated
vendored
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user