diff --git a/backend/Dockerfile b/backend/Dockerfile index 84592ca0..8e9a8d3e 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -24,7 +24,6 @@ RUN apt-get update && apt-get install -y \ --no-install-recommends \ && rm -rf /var/lib/apt/lists/* -# Tell Puppeteer to use system Chromium ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium @@ -35,6 +34,9 @@ RUN npm ci --only=production COPY --from=builder /app/dist ./dist +# Create local images directory for when MinIO is not configured +RUN mkdir -p /app/public/images/products + EXPOSE 3010 CMD ["node", "dist/index.js"] diff --git a/backend/package-lock.json b/backend/package-lock.json index 18139db6..7a703c09 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -26,7 +26,7 @@ "puppeteer": "^21.0.0", "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-stealth": "^2.11.2", - "sharp": "^0.34.5", + "sharp": "^0.32.0", "socks-proxy-agent": "^8.0.2", "uuid": "^9.0.1", "zod": "^3.22.4" @@ -38,6 +38,7 @@ "@types/jsonwebtoken": "^9.0.5", "@types/node": "^20.10.5", "@types/node-cron": "^3.0.11", + "@types/pg": "^8.15.6", "@types/uuid": "^9.0.7", "tsx": "^4.7.0", "typescript": "^5.3.3" @@ -64,15 +65,6 @@ "node": ">=6.9.0" } }, - "node_modules/@emnapi/runtime": { - "version": "1.7.1", - "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.7.1.tgz", - "integrity": "sha512-PVtJr5CmLwYAU9PZDMITZoR5iAOShYREoR45EyyLrbntV50mdePTgUn4AmOw90Ifcj+x2kRjdzr1HP3RrNiHGA==", - "optional": true, - "dependencies": { - "tslib": "^2.4.0" - } - }, "node_modules/@esbuild/aix-ppc64": { "version": "0.25.12", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.12.tgz", @@ -489,446 +481,6 @@ "node": ">=18" } }, - "node_modules/@img/colour": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.0.0.tgz", - "integrity": "sha512-A5P/LfWGFSl6nsckYtjw9da+19jB8hkJ6ACTGcDfEJ0aE+l2n2El7dsVM7UVHZQ9s2lmYMWlrS21YLy2IR1LUw==", - "engines": { - "node": ">=18" - } - }, - "node_modules/@img/sharp-darwin-arm64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.5.tgz", - "integrity": "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==", - "cpu": [ - "arm64" - ], - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-darwin-arm64": "1.2.4" - } - }, - "node_modules/@img/sharp-darwin-x64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.5.tgz", - "integrity": "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-darwin-x64": "1.2.4" - } - }, - "node_modules/@img/sharp-libvips-darwin-arm64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.4.tgz", - "integrity": "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==", - "cpu": [ - "arm64" - ], - "optional": true, - "os": [ - "darwin" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-darwin-x64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.4.tgz", - "integrity": "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "darwin" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-arm": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.4.tgz", - "integrity": "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==", - "cpu": [ - "arm" - ], - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-arm64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.4.tgz", - "integrity": "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==", - "cpu": [ - "arm64" - ], - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-ppc64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.4.tgz", - "integrity": "sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==", - "cpu": [ - "ppc64" - ], - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-riscv64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-riscv64/-/sharp-libvips-linux-riscv64-1.2.4.tgz", - "integrity": "sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==", - "cpu": [ - "riscv64" - ], - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-s390x": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.4.tgz", - "integrity": "sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==", - "cpu": [ - "s390x" - ], - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-x64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.4.tgz", - "integrity": "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linuxmusl-arm64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.4.tgz", - "integrity": "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==", - "cpu": [ - "arm64" - ], - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linuxmusl-x64": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.4.tgz", - "integrity": "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-linux-arm": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.5.tgz", - "integrity": "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==", - "cpu": [ - "arm" - ], - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-arm": "1.2.4" - } - }, - "node_modules/@img/sharp-linux-arm64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.5.tgz", - "integrity": "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==", - "cpu": [ - "arm64" - ], - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-arm64": "1.2.4" - } - }, - "node_modules/@img/sharp-linux-ppc64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.5.tgz", - "integrity": "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA==", - "cpu": [ - "ppc64" - ], - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-ppc64": "1.2.4" - } - }, - "node_modules/@img/sharp-linux-riscv64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-riscv64/-/sharp-linux-riscv64-0.34.5.tgz", - "integrity": "sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw==", - "cpu": [ - "riscv64" - ], - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-riscv64": "1.2.4" - } - }, - "node_modules/@img/sharp-linux-s390x": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.5.tgz", - "integrity": "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg==", - "cpu": [ - "s390x" - ], - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-s390x": "1.2.4" - } - }, - "node_modules/@img/sharp-linux-x64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.5.tgz", - "integrity": "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-x64": "1.2.4" - } - }, - "node_modules/@img/sharp-linuxmusl-arm64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.5.tgz", - "integrity": "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==", - "cpu": [ - "arm64" - ], - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linuxmusl-arm64": "1.2.4" - } - }, - "node_modules/@img/sharp-linuxmusl-x64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.5.tgz", - "integrity": "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linuxmusl-x64": "1.2.4" - } - }, - "node_modules/@img/sharp-wasm32": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.5.tgz", - "integrity": "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw==", - "cpu": [ - "wasm32" - ], - "optional": true, - "dependencies": { - "@emnapi/runtime": "^1.7.0" - }, - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-win32-arm64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.5.tgz", - "integrity": "sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==", - "cpu": [ - "arm64" - ], - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-win32-ia32": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.5.tgz", - "integrity": "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg==", - "cpu": [ - "ia32" - ], - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-win32-x64": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.5.tgz", - "integrity": "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, "node_modules/@mapbox/node-pre-gyp": { "version": "1.0.11", "resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz", @@ -1149,6 +701,17 @@ "integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==", "dev": true }, + "node_modules/@types/pg": { + "version": "8.15.6", + "resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz", + "integrity": "sha512-NoaMtzhxOrubeL/7UZuNTrejB4MPAJ0RpxZqXQf2qXuVlTPuG6Y8p4u9dKRaue4yjmC7ZhzVO2/Yyyn25znrPQ==", + "dev": true, + "dependencies": { + "@types/node": "*", + "pg-protocol": "*", + "pg-types": "^2.2.0" + } + }, "node_modules/@types/qs": { "version": "6.14.0", "resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.14.0.tgz", @@ -1411,6 +974,16 @@ "node": ">= 10.0.0" } }, + "node_modules/bl": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", + "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", + "dependencies": { + "buffer": "^5.5.0", + "inherits": "^2.0.4", + "readable-stream": "^3.4.0" + } + }, "node_modules/block-stream2": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/block-stream2/-/block-stream2-2.1.0.tgz", @@ -1600,6 +1173,18 @@ "node": ">=0.10.0" } }, + "node_modules/color": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz", + "integrity": "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==", + "dependencies": { + "color-convert": "^2.0.1", + "color-string": "^1.9.0" + }, + "engines": { + "node": ">=12.5.0" + } + }, "node_modules/color-convert": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", @@ -1616,6 +1201,15 @@ "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" }, + "node_modules/color-string": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz", + "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==", + "dependencies": { + "color-name": "^1.0.0", + "simple-swizzle": "^0.2.2" + } + }, "node_modules/color-support": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/color-support/-/color-support-1.1.3.tgz", @@ -1746,6 +1340,28 @@ "node": ">=0.10" } }, + "node_modules/decompress-response": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", + "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", + "dependencies": { + "mimic-response": "^3.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/deep-extend": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", + "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", + "engines": { + "node": ">=4.0.0" + } + }, "node_modules/deepmerge": { "version": "4.3.1", "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz", @@ -2059,6 +1675,14 @@ "bare-events": "^2.7.0" } }, + "node_modules/expand-template": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", + "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==", + "engines": { + "node": ">=6" + } + }, "node_modules/express": { "version": "4.21.2", "resolved": "https://registry.npmjs.org/express/-/express-4.21.2.tgz", @@ -2296,6 +1920,11 @@ "node": ">= 0.6" } }, + "node_modules/fs-constants": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", + "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==" + }, "node_modules/fs-extra": { "version": "10.1.0", "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.1.0.tgz", @@ -2489,6 +2118,11 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" }, + "node_modules/github-from-package": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", + "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==" + }, "node_modules/glob": { "version": "7.2.3", "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", @@ -2726,6 +2360,11 @@ "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" }, + "node_modules/ini": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", + "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==" + }, "node_modules/ip-address": { "version": "10.1.0", "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz", @@ -3113,6 +2752,17 @@ "node": ">= 0.6" } }, + "node_modules/mimic-response": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", + "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -3124,6 +2774,14 @@ "node": "*" } }, + "node_modules/minimist": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/minio": { "version": "7.1.3", "resolved": "https://registry.npmjs.org/minio/-/minio-7.1.3.tgz", @@ -3225,6 +2883,11 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==" }, + "node_modules/napi-build-utils": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz", + "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==" + }, "node_modules/negotiator": { "version": "0.6.3", "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", @@ -3241,6 +2904,17 @@ "node": ">= 0.4.0" } }, + "node_modules/node-abi": { + "version": "3.85.0", + "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.85.0.tgz", + "integrity": "sha512-zsFhmbkAzwhTft6nd3VxcG0cvJsT70rL+BIGHWVq5fi6MwGrHwzqKaxXE+Hl2GmnGItnDKPPkO5/LQqjVkIdFg==", + "dependencies": { + "semver": "^7.3.5" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/node-addon-api": { "version": "5.1.0", "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-5.1.0.tgz", @@ -3667,6 +3341,62 @@ "node": ">=0.10.0" } }, + "node_modules/prebuild-install": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", + "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==", + "dependencies": { + "detect-libc": "^2.0.0", + "expand-template": "^2.0.3", + "github-from-package": "0.0.0", + "minimist": "^1.2.3", + "mkdirp-classic": "^0.5.3", + "napi-build-utils": "^2.0.0", + "node-abi": "^3.3.0", + "pump": "^3.0.0", + "rc": "^1.2.7", + "simple-get": "^4.0.0", + "tar-fs": "^2.0.0", + "tunnel-agent": "^0.6.0" + }, + "bin": { + "prebuild-install": "bin.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/prebuild-install/node_modules/chownr": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", + "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==" + }, + "node_modules/prebuild-install/node_modules/tar-fs": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz", + "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==", + "dependencies": { + "chownr": "^1.1.1", + "mkdirp-classic": "^0.5.2", + "pump": "^3.0.0", + "tar-stream": "^2.1.4" + } + }, + "node_modules/prebuild-install/node_modules/tar-stream": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", + "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", + "dependencies": { + "bl": "^4.0.3", + "end-of-stream": "^1.4.1", + "fs-constants": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.1.1" + }, + "engines": { + "node": ">=6" + } + }, "node_modules/progress": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", @@ -4092,6 +3822,20 @@ "node": ">= 0.8" } }, + "node_modules/rc": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", + "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==", + "dependencies": { + "deep-extend": "^0.6.0", + "ini": "~1.3.0", + "minimist": "^1.2.0", + "strip-json-comments": "~2.0.1" + }, + "bin": { + "rc": "cli.js" + } + }, "node_modules/readable-stream": { "version": "3.6.2", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", @@ -4311,49 +4055,32 @@ } }, "node_modules/sharp": { - "version": "0.34.5", - "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.5.tgz", - "integrity": "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==", + "version": "0.32.6", + "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.32.6.tgz", + "integrity": "sha512-KyLTWwgcR9Oe4d9HwCwNM2l7+J0dUQwn/yf7S0EnTtb0eVS4RxO0eUSvxPtzT4F3SY+C4K6fqdv/DO27sJ/v/w==", "hasInstallScript": true, - "license": "Apache-2.0", "dependencies": { - "@img/colour": "^1.0.0", - "detect-libc": "^2.1.2", - "semver": "^7.7.3" + "color": "^4.2.3", + "detect-libc": "^2.0.2", + "node-addon-api": "^6.1.0", + "prebuild-install": "^7.1.1", + "semver": "^7.5.4", + "simple-get": "^4.0.1", + "tar-fs": "^3.0.4", + "tunnel-agent": "^0.6.0" }, "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + "node": ">=14.15.0" }, "funding": { "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-darwin-arm64": "0.34.5", - "@img/sharp-darwin-x64": "0.34.5", - "@img/sharp-libvips-darwin-arm64": "1.2.4", - "@img/sharp-libvips-darwin-x64": "1.2.4", - "@img/sharp-libvips-linux-arm": "1.2.4", - "@img/sharp-libvips-linux-arm64": "1.2.4", - "@img/sharp-libvips-linux-ppc64": "1.2.4", - "@img/sharp-libvips-linux-riscv64": "1.2.4", - "@img/sharp-libvips-linux-s390x": "1.2.4", - "@img/sharp-libvips-linux-x64": "1.2.4", - "@img/sharp-libvips-linuxmusl-arm64": "1.2.4", - "@img/sharp-libvips-linuxmusl-x64": "1.2.4", - "@img/sharp-linux-arm": "0.34.5", - "@img/sharp-linux-arm64": "0.34.5", - "@img/sharp-linux-ppc64": "0.34.5", - "@img/sharp-linux-riscv64": "0.34.5", - "@img/sharp-linux-s390x": "0.34.5", - "@img/sharp-linux-x64": "0.34.5", - "@img/sharp-linuxmusl-arm64": "0.34.5", - "@img/sharp-linuxmusl-x64": "0.34.5", - "@img/sharp-wasm32": "0.34.5", - "@img/sharp-win32-arm64": "0.34.5", - "@img/sharp-win32-ia32": "0.34.5", - "@img/sharp-win32-x64": "0.34.5" } }, + "node_modules/sharp/node_modules/node-addon-api": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-6.1.0.tgz", + "integrity": "sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA==" + }, "node_modules/side-channel": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", @@ -4427,6 +4154,62 @@ "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==" }, + "node_modules/simple-concat": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", + "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/simple-get": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz", + "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "dependencies": { + "decompress-response": "^6.0.0", + "once": "^1.3.1", + "simple-concat": "^1.0.0" + } + }, + "node_modules/simple-swizzle": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.4.tgz", + "integrity": "sha512-nAu1WFPQSMNr2Zn9PGSZK9AGn4t/y97lEm+MXTtUDwfP0ksAIX4nO+6ruD9Jwut4C49SB1Ws+fbXsm/yScWOHw==", + "dependencies": { + "is-arrayish": "^0.3.1" + } + }, + "node_modules/simple-swizzle/node_modules/is-arrayish": { + "version": "0.3.4", + "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.4.tgz", + "integrity": "sha512-m6UrgzFVUYawGBh1dUsWR5M2Clqic9RVXC/9f8ceNlv2IcO9j9J/z8UoCLPqtsPBFNzEpfR3xftohbfqDx8EQA==" + }, "node_modules/smart-buffer": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz", @@ -4566,6 +4349,14 @@ "node": ">=8" } }, + "node_modules/strip-json-comments": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", + "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/strnum": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/strnum/-/strnum-1.1.2.tgz", @@ -4671,6 +4462,17 @@ "fsevents": "~2.3.3" } }, + "node_modules/tunnel-agent": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", + "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==", + "dependencies": { + "safe-buffer": "^5.0.1" + }, + "engines": { + "node": "*" + } + }, "node_modules/type-is": { "version": "1.6.18", "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", diff --git a/backend/package.json b/backend/package.json index 032ce072..9706a1d0 100755 --- a/backend/package.json +++ b/backend/package.json @@ -29,7 +29,7 @@ "puppeteer": "^21.0.0", "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-stealth": "^2.11.2", - "sharp": "^0.34.5", + "sharp": "^0.32.0", "socks-proxy-agent": "^8.0.2", "uuid": "^9.0.1", "zod": "^3.22.4" @@ -41,6 +41,7 @@ "@types/jsonwebtoken": "^9.0.5", "@types/node": "^20.10.5", "@types/node-cron": "^3.0.11", + "@types/pg": "^8.15.6", "@types/uuid": "^9.0.7", "tsx": "^4.7.0", "typescript": "^5.3.3" diff --git a/backend/src/index.ts b/backend/src/index.ts index 56aa55d1..3a2f55e7 100755 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -1,7 +1,8 @@ import express from 'express'; import cors from 'cors'; +import path from 'path'; import dotenv from 'dotenv'; -import { initializeMinio } from './utils/minio'; +import { initializeMinio, isMinioEnabled } from './utils/minio'; import { logger } from './services/logger'; import { cleanupOrphanedJobs } from './services/proxyTestQueue'; @@ -13,10 +14,25 @@ const PORT = process.env.PORT || 3010; app.use(cors()); app.use(express.json()); +// Serve static images when MinIO is not configured +const LOCAL_IMAGES_PATH = process.env.LOCAL_IMAGES_PATH || '/app/public/images'; +app.use('/images', express.static(LOCAL_IMAGES_PATH)); + app.get('/health', (req, res) => { res.json({ status: 'ok', timestamp: new Date().toISOString() }); }); +// Endpoint to check server's outbound IP (for proxy whitelist setup) +app.get('/outbound-ip', async (req, res) => { + try { + const axios = require('axios'); + const response = await axios.get('https://api.ipify.org?format=json', { timeout: 10000 }); + res.json({ outbound_ip: response.data.ip }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + import authRoutes from './routes/auth'; import dashboardRoutes from './routes/dashboard'; import storesRoutes from './routes/stores'; @@ -32,6 +48,7 @@ import logsRoutes from './routes/logs'; import scraperMonitorRoutes from './routes/scraper-monitor'; import apiTokensRoutes from './routes/api-tokens'; import apiPermissionsRoutes from './routes/api-permissions'; +import parallelScrapeRoutes from './routes/parallel-scrape'; import { trackApiUsage, checkRateLimit } from './middleware/apiTokenTracker'; import { validateWordPressPermissions } from './middleware/wordpressPermissions'; @@ -57,13 +74,14 @@ app.use('/api/logs', logsRoutes); app.use('/api/scraper-monitor', scraperMonitorRoutes); app.use('/api/api-tokens', apiTokensRoutes); app.use('/api/api-permissions', apiPermissionsRoutes); +app.use('/api/parallel-scrape', parallelScrapeRoutes); async function startServer() { try { logger.info('system', 'Starting server...'); await initializeMinio(); - logger.info('system', 'Minio initialized'); + logger.info('system', isMinioEnabled() ? 'MinIO storage initialized' : 'Local filesystem storage initialized'); // Clean up any orphaned proxy test jobs from previous server runs await cleanupOrphanedJobs(); diff --git a/backend/src/middleware/wordpressPermissions.ts b/backend/src/middleware/wordpressPermissions.ts index b356612e..66db2f89 100644 --- a/backend/src/middleware/wordpressPermissions.ts +++ b/backend/src/middleware/wordpressPermissions.ts @@ -161,7 +161,7 @@ export async function validateWordPressPermissions( UPDATE wp_dutchie_api_permissions SET last_used_at = CURRENT_TIMESTAMP WHERE id = $1 - `, [permission.id]).catch(err => { + `, [permission.id]).catch((err: Error) => { console.error('Error updating last_used_at:', err); }); diff --git a/backend/src/routes/categories.ts b/backend/src/routes/categories.ts index 21908b0d..7d61e918 100644 --- a/backend/src/routes/categories.ts +++ b/backend/src/routes/categories.ts @@ -67,12 +67,12 @@ router.get('/tree', async (req, res) => { const tree: any[] = []; // First pass: create map - categories.forEach(cat => { + categories.forEach((cat: { id: number; parent_id?: number }) => { categoryMap.set(cat.id, { ...cat, children: [] }); }); - + // Second pass: build tree - categories.forEach(cat => { + categories.forEach((cat: { id: number; parent_id?: number }) => { const node = categoryMap.get(cat.id); if (cat.parent_id) { const parent = categoryMap.get(cat.parent_id); diff --git a/backend/src/routes/parallel-scrape.ts b/backend/src/routes/parallel-scrape.ts new file mode 100644 index 00000000..aa057a81 --- /dev/null +++ b/backend/src/routes/parallel-scrape.ts @@ -0,0 +1,252 @@ +import { Router } from 'express'; +import { pool } from '../db/migrate'; +import { getActiveProxy, putProxyInTimeout, isBotDetectionError } from '../services/proxy'; +import { authMiddleware } from '../auth/middleware'; + +const router = Router(); +router.use(authMiddleware); + +const FIREFOX_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0'; + +interface ScrapeJob { + id: string; + storeName: string; + status: 'running' | 'completed' | 'failed'; + workers: number; + startedAt: Date; + completedAt?: Date; + results: { + category: string; + success: boolean; + products: number; + error?: string; + }[]; +} + +// In-memory job tracking +const activeJobs = new Map(); + +// Get job status +router.get('/status/:jobId', (req, res) => { + const job = activeJobs.get(req.params.jobId); + if (!job) { + return res.status(404).json({ error: 'Job not found' }); + } + res.json(job); +}); + +// List active jobs +router.get('/jobs', (req, res) => { + const jobs = Array.from(activeJobs.values()); + res.json({ jobs }); +}); + +// Start parallel scrape +router.post('/start', async (req, res) => { + const { storeName = 'Deeply Rooted', workers = 15, useProxies = true } = req.body; + + try { + // Find the store + const storeResult = await pool.query( + `SELECT id, name, slug, dutchie_url FROM stores WHERE name ILIKE $1 LIMIT 1`, + [`%${storeName}%`] + ); + + if (storeResult.rows.length === 0) { + return res.status(404).json({ error: `Store not found: ${storeName}` }); + } + + const store = storeResult.rows[0]; + + // Get categories + const categoriesResult = await pool.query( + `SELECT id, name, slug, dutchie_url as url FROM categories WHERE store_id = $1 AND scrape_enabled = true`, + [store.id] + ); + + if (categoriesResult.rows.length === 0) { + return res.status(404).json({ error: 'No categories found for this store' }); + } + + const categories = categoriesResult.rows; + + // Create job + const jobId = `scrape-${Date.now()}`; + const job: ScrapeJob = { + id: jobId, + storeName: store.name, + status: 'running', + workers, + startedAt: new Date(), + results: [] + }; + activeJobs.set(jobId, job); + + // Start scraping in background + runParallelScrape(job, store, categories, workers, useProxies).catch(err => { + console.error('Parallel scrape error:', err); + job.status = 'failed'; + }); + + res.json({ + message: 'Parallel scrape started', + jobId, + store: store.name, + categories: categories.length, + workers + }); + + } catch (error: any) { + console.error('Failed to start parallel scrape:', error); + res.status(500).json({ error: error.message }); + } +}); + +async function runParallelScrape( + job: ScrapeJob, + store: any, + categories: any[], + numWorkers: number, + useProxies: boolean +) { + const puppeteer = require('puppeteer-extra'); + const StealthPlugin = require('puppeteer-extra-plugin-stealth'); + puppeteer.use(StealthPlugin()); + + // Expand categories for multiple passes + const expandedCategories: any[] = []; + const passes = Math.ceil(numWorkers / Math.max(categories.length, 1)); + for (let i = 0; i < passes; i++) { + expandedCategories.push(...categories); + } + + const categoryIndex = { current: 0 }; + + const worker = async (workerId: number) => { + while (categoryIndex.current < expandedCategories.length) { + const idx = categoryIndex.current++; + const category = expandedCategories[idx]; + if (!category) break; + + const result = await scrapeCategory(puppeteer, workerId, category, useProxies); + job.results.push({ + category: category.name, + success: result.success, + products: result.products, + error: result.error + }); + + // Delay between requests + await new Promise(resolve => setTimeout(resolve, 2000 + Math.random() * 3000)); + } + }; + + // Start workers with staggered starts + const workers: Promise[] = []; + for (let i = 0; i < numWorkers; i++) { + workers.push(worker(i + 1)); + await new Promise(resolve => setTimeout(resolve, 500)); + } + + await Promise.all(workers); + + job.status = 'completed'; + job.completedAt = new Date(); + + // Clean up job after 1 hour + setTimeout(() => activeJobs.delete(job.id), 60 * 60 * 1000); +} + +async function scrapeCategory( + puppeteer: any, + workerId: number, + category: any, + useProxies: boolean +): Promise<{ success: boolean; products: number; error?: string }> { + let browser = null; + let proxyId: number | null = null; + + try { + let proxy = null; + if (useProxies) { + proxy = await getActiveProxy(); + } + + const args = [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-accelerated-2d-canvas', + '--disable-gpu', + '--window-size=1920,1080', + ]; + + if (proxy) { + proxyId = proxy.id; + if (proxy.protocol === 'socks5' || proxy.protocol === 'socks') { + args.push(`--proxy-server=socks5://${proxy.host}:${proxy.port}`); + } else { + args.push(`--proxy-server=${proxy.protocol}://${proxy.host}:${proxy.port}`); + } + } + + browser = await puppeteer.launch({ + headless: 'new', + args, + executablePath: process.env.PUPPETEER_EXECUTABLE_PATH || '/usr/bin/chromium', + }); + + const page = await browser.newPage(); + await page.setUserAgent(FIREFOX_USER_AGENT); + await page.setViewport({ width: 1920, height: 1080 }); + + if (proxy?.username && proxy?.password) { + await page.authenticate({ + username: proxy.username, + password: proxy.password, + }); + } + + console.log(`[Worker ${workerId}] Scraping: ${category.name} (${category.url})`); + + const response = await page.goto(category.url, { + waitUntil: 'networkidle2', + timeout: 60000, + }); + + if (!response || !response.ok()) { + throw new Error(`Failed to load page: ${response?.status()}`); + } + + await page.waitForSelector('[data-testid="product-list-item"], a[href*="/product/"]', { + timeout: 30000, + }).catch(() => {}); + + const products = await page.evaluate(() => { + // Try data-testid first, then fall back to product links + const listItems = document.querySelectorAll('[data-testid="product-list-item"]'); + if (listItems.length > 0) return listItems.length; + return document.querySelectorAll('a[href*="/product/"]').length; + }); + + console.log(`[Worker ${workerId}] Found ${products} products in ${category.name}`); + + await browser.close(); + return { success: true, products }; + + } catch (error: any) { + console.error(`[Worker ${workerId}] Error:`, error.message); + + if (proxyId && isBotDetectionError(error.message)) { + putProxyInTimeout(proxyId, error.message); + } + + if (browser) { + await browser.close().catch(() => {}); + } + + return { success: false, products: 0, error: error.message }; + } +} + +export default router; diff --git a/backend/src/routes/products.ts b/backend/src/routes/products.ts index 098daa65..9a441fcd 100755 --- a/backend/src/routes/products.ts +++ b/backend/src/routes/products.ts @@ -136,17 +136,17 @@ router.get('/', async (req, res) => { const result = await pool.query(query, params); // Add image URLs - let products = result.rows.map(p => ({ + let products = result.rows.map((p: Record) => ({ ...p, - image_url_full: p.local_image_path ? getImageUrl(p.local_image_path) : p.image_url, - thumbnail_url: p.thumbnail_path ? getImageUrl(p.thumbnail_path) : null, - medium_url: p.medium_path ? getImageUrl(p.medium_path) : null, + image_url_full: p.local_image_path ? getImageUrl(p.local_image_path as string) : p.image_url, + thumbnail_url: p.thumbnail_path ? getImageUrl(p.thumbnail_path as string) : null, + medium_url: p.medium_path ? getImageUrl(p.medium_path as string) : null, })); // Field selection if (fields) { const selectedFields = (fields as string).split(',').map(f => f.trim()); - products = products.map(p => selectFields(p, selectedFields)); + products = products.map((p: Record) => selectFields(p, selectedFields)); } // Get total count (reuse same filters) @@ -300,7 +300,7 @@ router.get('/meta/brands', async (req, res) => { query += ' ORDER BY brand'; const result = await pool.query(query, params); - const brands = result.rows.map(row => row.brand); + const brands = result.rows.map((row: { brand: string }) => row.brand); res.json({ brands }); } catch (error) { diff --git a/backend/src/routes/scraper-monitor.ts b/backend/src/routes/scraper-monitor.ts index f2a62f55..5d4d7bb9 100644 --- a/backend/src/routes/scraper-monitor.ts +++ b/backend/src/routes/scraper-monitor.ts @@ -21,6 +21,8 @@ interface ActiveScraper { itemsSaved: number; itemsDropped: number; errorsCount: number; + productsProcessed?: number; + productsTotal?: number; }; currentActivity?: string; } @@ -200,7 +202,7 @@ router.get('/jobs/stats', async (req, res) => { total_products_saved: 0 }; - result.rows.forEach(row => { + result.rows.forEach((row: { status: string; count: string; total_products_found?: string; total_products_saved?: string }) => { stats[row.status as keyof typeof stats] = parseInt(row.count); if (row.status === 'completed') { stats.total_products_found = parseInt(row.total_products_found || '0'); diff --git a/backend/src/scraper-v2/engine.ts b/backend/src/scraper-v2/engine.ts index bfa4f3d7..2c61ebfd 100644 --- a/backend/src/scraper-v2/engine.ts +++ b/backend/src/scraper-v2/engine.ts @@ -365,7 +365,7 @@ export class DutchieSpider { logger.error('scraper', `Category scrape failed: ${error}`); if (completeScraper) { - completeScraper(scraperId, error.toString()); + completeScraper(scraperId, String(error)); } throw error; diff --git a/backend/src/scraper-v2/index.ts b/backend/src/scraper-v2/index.ts index 74857eab..c994750e 100644 --- a/backend/src/scraper-v2/index.ts +++ b/backend/src/scraper-v2/index.ts @@ -58,7 +58,7 @@ export async function scrapeCategory(storeId: number, categoryId: number): Promi /** * Scrape an entire store */ -export async function scrapeStore(storeId: number, parallel: number = 3): Promise { +export async function scrapeStore(storeId: number, parallel: number = 3, _userAgent?: string): Promise { const engine = new ScraperEngine(1); const spider = new DutchieSpider(engine); diff --git a/backend/src/scripts/parallel-scrape.ts b/backend/src/scripts/parallel-scrape.ts new file mode 100644 index 00000000..76a86b41 --- /dev/null +++ b/backend/src/scripts/parallel-scrape.ts @@ -0,0 +1,241 @@ +import { pool } from '../db/migrate'; +import { getActiveProxy, putProxyInTimeout, isBotDetectionError } from '../services/proxy'; +import puppeteer from 'puppeteer-extra'; +import StealthPlugin from 'puppeteer-extra-plugin-stealth'; + +puppeteer.use(StealthPlugin()); + +const FIREFOX_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0'; +const NUM_WORKERS = parseInt(process.argv[2] || '15'); +const DISPENSARY_NAME = process.argv[3] || 'Deeply Rooted'; +const USE_PROXIES = process.argv[4] !== 'no-proxy'; + +interface Category { + id: number; + name: string; + slug: string; + url: string; +} + +interface Store { + id: number; + name: string; + slug: string; + dutchie_url: string; +} + +async function getStore(name: string): Promise { + const result = await pool.query( + `SELECT id, name, slug, dutchie_url FROM stores WHERE name ILIKE $1 LIMIT 1`, + [`%${name}%`] + ); + return result.rows[0] || null; +} + +async function getCategories(storeId: number): Promise { + const result = await pool.query( + `SELECT id, name, slug, dutchie_url as url FROM categories WHERE store_id = $1 AND scrape_enabled = true`, + [storeId] + ); + return result.rows; +} + +async function scrapeWithProxy( + workerId: number, + store: Store, + category: Category +): Promise<{ success: boolean; products: number; error?: string }> { + let browser = null; + let proxyId: number | null = null; + + try { + // Get a proxy (if enabled) + let proxy = null; + if (USE_PROXIES) { + proxy = await getActiveProxy(); + if (proxy) { + proxyId = proxy.id; + console.log(`[Worker ${workerId}] Using proxy: ${proxy.protocol}://${proxy.host}:${proxy.port}`); + } else { + console.log(`[Worker ${workerId}] No proxy available, using direct connection`); + } + } else { + console.log(`[Worker ${workerId}] Direct connection (proxies disabled)`); + } + + // Build browser args + const args = [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-accelerated-2d-canvas', + '--disable-gpu', + '--window-size=1920,1080', + ]; + + if (proxy) { + if (proxy.protocol === 'socks5' || proxy.protocol === 'socks') { + args.push(`--proxy-server=socks5://${proxy.host}:${proxy.port}`); + } else { + args.push(`--proxy-server=${proxy.protocol}://${proxy.host}:${proxy.port}`); + } + } + + browser = await puppeteer.launch({ + headless: true, + args, + executablePath: process.env.PUPPETEER_EXECUTABLE_PATH, + }); + + const page = await browser.newPage(); + await page.setUserAgent(FIREFOX_USER_AGENT); + await page.setViewport({ width: 1920, height: 1080 }); + + // Handle proxy auth if needed + if (proxy?.username && proxy?.password) { + await page.authenticate({ + username: proxy.username, + password: proxy.password, + }); + } + + console.log(`[Worker ${workerId}] Scraping category: ${category.name} (${category.url})`); + + // Navigate to the category page + const response = await page.goto(category.url, { + waitUntil: 'networkidle2', + timeout: 60000, + }); + + if (!response || !response.ok()) { + throw new Error(`Failed to load page: ${response?.status()}`); + } + + // Wait for products to load + await page.waitForSelector('[data-testid="product-list-item"], a[href*="/product/"]', { + timeout: 30000, + }).catch(() => { + console.log(`[Worker ${workerId}] No products found on page`); + }); + + // Extract products + const products = await page.evaluate(() => { + // Try data-testid first, then fall back to product links + const listItems = document.querySelectorAll('[data-testid="product-list-item"]'); + if (listItems.length > 0) return listItems.length; + return document.querySelectorAll('a[href*="/product/"]').length; + }); + + console.log(`[Worker ${workerId}] Found ${products} products in ${category.name}`); + + await browser.close(); + return { success: true, products }; + + } catch (error: any) { + console.error(`[Worker ${workerId}] Error:`, error.message); + + // Check for bot detection + if (proxyId && isBotDetectionError(error.message)) { + putProxyInTimeout(proxyId, error.message); + } + + if (browser) { + await browser.close().catch(() => {}); + } + + return { success: false, products: 0, error: error.message }; + } +} + +async function worker( + workerId: number, + store: Store, + categories: Category[], + categoryIndex: { current: number } +): Promise { + while (categoryIndex.current < categories.length) { + const idx = categoryIndex.current++; + const category = categories[idx]; + + if (!category) break; + + console.log(`[Worker ${workerId}] Starting category ${idx + 1}/${categories.length}: ${category.name}`); + + const result = await scrapeWithProxy(workerId, store, category); + + if (result.success) { + console.log(`[Worker ${workerId}] Completed ${category.name}: ${result.products} products`); + } else { + console.log(`[Worker ${workerId}] Failed ${category.name}: ${result.error}`); + } + + // Small delay between requests + await new Promise(resolve => setTimeout(resolve, 2000 + Math.random() * 3000)); + } + + console.log(`[Worker ${workerId}] Finished all assigned work`); +} + +async function main() { + console.log(`\n${'='.repeat(60)}`); + console.log(`Parallel Scraper - ${NUM_WORKERS} workers`); + console.log(`Target: ${DISPENSARY_NAME}`); + console.log(`User Agent: Firefox`); + console.log(`Proxies: ${USE_PROXIES ? 'Enabled' : 'Disabled'}`); + console.log(`${'='.repeat(60)}\n`); + + // Find the store + const store = await getStore(DISPENSARY_NAME); + if (!store) { + console.error(`Store not found: ${DISPENSARY_NAME}`); + process.exit(1); + } + + console.log(`Found store: ${store.name} (ID: ${store.id})`); + + // Get categories + const categories = await getCategories(store.id); + if (categories.length === 0) { + console.error('No categories found for this store'); + process.exit(1); + } + + console.log(`Found ${categories.length} categories to scrape`); + console.log(`Categories: ${categories.map(c => c.name).join(', ')}\n`); + + // Check proxies + const proxyResult = await pool.query('SELECT COUNT(*) as total, COUNT(*) FILTER (WHERE active = true) as active FROM proxies'); + console.log(`Proxies: ${proxyResult.rows[0].active} active / ${proxyResult.rows[0].total} total\n`); + + // Shared index for work distribution + const categoryIndex = { current: 0 }; + + // For a store with few categories, we'll run multiple passes + // Expand the work by duplicating categories for parallel workers + const expandedCategories: Category[] = []; + const passes = Math.ceil(NUM_WORKERS / Math.max(categories.length, 1)); + for (let i = 0; i < passes; i++) { + expandedCategories.push(...categories); + } + + console.log(`Running ${NUM_WORKERS} workers across ${expandedCategories.length} category scrapes\n`); + + // Start workers + const workers: Promise[] = []; + for (let i = 0; i < NUM_WORKERS; i++) { + workers.push(worker(i + 1, store, expandedCategories, categoryIndex)); + // Stagger worker starts + await new Promise(resolve => setTimeout(resolve, 500)); + } + + // Wait for all workers + await Promise.all(workers); + + console.log(`\n${'='.repeat(60)}`); + console.log('All workers completed!'); + console.log(`${'='.repeat(60)}\n`); + + await pool.end(); +} + +main().catch(console.error); diff --git a/backend/src/services/category-discovery.ts b/backend/src/services/category-discovery.ts index a847cc93..d68b0a7c 100644 --- a/backend/src/services/category-discovery.ts +++ b/backend/src/services/category-discovery.ts @@ -4,7 +4,7 @@ import { Browser, Page } from 'puppeteer'; import { pool } from '../db/migrate'; import { logger } from './logger'; import { bypassAgeGate, detectStateFromUrl, setAgeGateCookies } from '../utils/age-gate'; -import { dutchieTemplate } from './scrapers/templates/dutchie'; +import { dutchieTemplate } from '../scrapers/templates/dutchie'; // Apply stealth plugin puppeteer.use(StealthPlugin()); diff --git a/backend/src/services/logger.ts b/backend/src/services/logger.ts index 69138fe0..1310be95 100644 --- a/backend/src/services/logger.ts +++ b/backend/src/services/logger.ts @@ -1,7 +1,7 @@ interface LogEntry { timestamp: Date; level: 'info' | 'error' | 'warn' | 'debug'; - category: 'scraper' | 'images' | 'categories' | 'system' | 'api' | 'pipeline'; + category: 'scraper' | 'images' | 'categories' | 'system' | 'api' | 'pipeline' | 'age-gate' | 'proxy'; message: string; } diff --git a/backend/src/services/proxy.ts b/backend/src/services/proxy.ts index 757e6997..bc3aeca7 100755 --- a/backend/src/services/proxy.ts +++ b/backend/src/services/proxy.ts @@ -91,8 +91,8 @@ async function getSettings(): Promise<{ timeout: number; testUrl: string }> { WHERE key IN ('proxy_timeout_ms', 'proxy_test_url') `); - const settings: any = {}; - result.rows.forEach(row => { + const settings: Record = {}; + result.rows.forEach((row: { key: string; value: string }) => { settings[row.key] = row.value; }); diff --git a/backend/src/services/scheduler.ts b/backend/src/services/scheduler.ts index b16a2df9..3a88b31a 100755 --- a/backend/src/services/scheduler.ts +++ b/backend/src/services/scheduler.ts @@ -13,8 +13,8 @@ async function getSettings(): Promise<{ WHERE key IN ('scrape_interval_hours', 'scrape_specials_time') `); - const settings: any = {}; - result.rows.forEach(row => { + const settings: Record = {}; + result.rows.forEach((row: { key: string; value: string }) => { settings[row.key] = row.value; }); diff --git a/backend/src/services/scraper.ts b/backend/src/services/scraper.ts index 7189bc9e..314ad0b3 100755 --- a/backend/src/services/scraper.ts +++ b/backend/src/services/scraper.ts @@ -385,13 +385,20 @@ export async function scrapeCategory(storeId: number, categoryId: number, userAg try { await page.goto(category.dutchie_url, { - waitUntil: 'domcontentloaded', + waitUntil: 'networkidle2', timeout: 60000 }); // If age gate still appears, try to bypass it await bypassAgeGate(page, state); + // Wait for products to load + await page.waitForSelector('[data-testid="product-list-item"], a[href*="/product/"]', { + timeout: 30000, + }).catch(() => { + logger.warn('scraper', 'No product selectors found, trying anyway...'); + }); + logger.info('scraper', 'Scrolling to load all products...'); await autoScroll(page); @@ -471,7 +478,7 @@ export async function scrapeCategory(storeId: number, categoryId: number, userAg } } - const linkEl = card.querySelector('a[href*="/product/"]'); + const linkEl = card.querySelector('a[href*="/product/"]') as HTMLAnchorElement | null; let href = linkEl?.href || linkEl?.getAttribute('href') || ''; if (href && href.startsWith('/')) { href = 'https://dutchie.com' + href; @@ -696,15 +703,24 @@ export async function saveProducts(storeId: number, categoryId: number, products JSON.stringify(product.metadata), productId ]); } else { + // Generate unique slug from product name + timestamp + random suffix + const baseSlug = product.name + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, '') + .substring(0, 150); + const uniqueSuffix = `${Date.now()}-${Math.random().toString(36).substr(2, 6)}`; + const slug = `${baseSlug}-${uniqueSuffix}`; + const insertResult = await client.query(` INSERT INTO products ( - store_id, category_id, dutchie_product_id, name, variant, description, + store_id, category_id, dutchie_product_id, name, slug, variant, description, price, strain_type, thc_percentage, cbd_percentage, brand, weight, image_url, dutchie_url, in_stock, metadata - ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, true, $15) + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, true, $16) RETURNING id `, [ - storeId, categoryId, product.dutchieProductId, product.name, product.variant, product.description, + storeId, categoryId, product.dutchieProductId, product.name, slug, product.variant, product.description, product.price, product.strainType, product.thcPercentage, product.cbdPercentage, product.brand, product.weight, product.imageUrl, product.dutchieUrl, JSON.stringify(product.metadata) diff --git a/backend/src/utils/age-gate.ts b/backend/src/utils/age-gate.ts index c9f881e3..b81f2eb7 100644 --- a/backend/src/utils/age-gate.ts +++ b/backend/src/utils/age-gate.ts @@ -175,8 +175,9 @@ export async function bypassAgeGate(page: Page, state: string = 'Arizona', useSa }, state); // Try Method 2: State button/card (click state, then click confirm) + let stateClicked = false; if (!selectFound) { - const stateClicked = await page.evaluate((selectedState) => { + stateClicked = await page.evaluate((selectedState) => { const allElements = Array.from(document.querySelectorAll('button, a, div, span, [role="button"], [class*="state"], [class*="State"], [class*="card"], [class*="option"]')); const stateButton = allElements.find(el => el.textContent?.toLowerCase().includes(selectedState.toLowerCase()) diff --git a/backend/src/utils/minio.ts b/backend/src/utils/minio.ts index a4a29d86..240d0200 100755 --- a/backend/src/utils/minio.ts +++ b/backend/src/utils/minio.ts @@ -2,9 +2,19 @@ import * as Minio from 'minio'; import axios from 'axios'; import { v4 as uuidv4 } from 'uuid'; import sharp from 'sharp'; +import * as fs from 'fs/promises'; +import * as path from 'path'; let minioClient: Minio.Client | null = null; +// Check if MinIO is configured +export function isMinioEnabled(): boolean { + return !!process.env.MINIO_ENDPOINT; +} + +// Local storage path for images when MinIO is not configured +const LOCAL_IMAGES_PATH = process.env.LOCAL_IMAGES_PATH || '/app/public/images'; + function getMinioClient(): Minio.Client { if (!minioClient) { minioClient = new Minio.Client({ @@ -21,6 +31,22 @@ function getMinioClient(): Minio.Client { const BUCKET_NAME = process.env.MINIO_BUCKET || 'dutchie'; export async function initializeMinio() { + // Skip MinIO initialization if not configured + if (!isMinioEnabled()) { + console.log('ℹ️ MinIO not configured (MINIO_ENDPOINT not set), using local filesystem storage'); + + // Ensure local images directory exists + try { + await fs.mkdir(LOCAL_IMAGES_PATH, { recursive: true }); + await fs.mkdir(path.join(LOCAL_IMAGES_PATH, 'products'), { recursive: true }); + console.log(`✅ Local images directory ready: ${LOCAL_IMAGES_PATH}`); + } catch (error) { + console.error('❌ Failed to create local images directory:', error); + throw error; + } + return; + } + try { const client = getMinioClient(); // Check if bucket exists @@ -94,9 +120,61 @@ async function removeBackground(buffer: Buffer): Promise { } } +async function uploadToLocalFilesystem( + thumbnailBuffer: Buffer, + mediumBuffer: Buffer, + fullBuffer: Buffer, + baseFilename: string +): Promise { + const thumbnailPath = `${baseFilename}-thumb.png`; + const mediumPath = `${baseFilename}-medium.png`; + const fullPath = `${baseFilename}-full.png`; + + await Promise.all([ + fs.writeFile(path.join(LOCAL_IMAGES_PATH, thumbnailPath), thumbnailBuffer), + fs.writeFile(path.join(LOCAL_IMAGES_PATH, mediumPath), mediumBuffer), + fs.writeFile(path.join(LOCAL_IMAGES_PATH, fullPath), fullBuffer), + ]); + + return { + thumbnail: thumbnailPath, + medium: mediumPath, + full: fullPath, + }; +} + +async function uploadToMinio( + thumbnailBuffer: Buffer, + mediumBuffer: Buffer, + fullBuffer: Buffer, + baseFilename: string +): Promise { + const client = getMinioClient(); + const thumbnailPath = `${baseFilename}-thumb.png`; + const mediumPath = `${baseFilename}-medium.png`; + const fullPath = `${baseFilename}-full.png`; + + await Promise.all([ + client.putObject(BUCKET_NAME, thumbnailPath, thumbnailBuffer, thumbnailBuffer.length, { + 'Content-Type': 'image/png', + }), + client.putObject(BUCKET_NAME, mediumPath, mediumBuffer, mediumBuffer.length, { + 'Content-Type': 'image/png', + }), + client.putObject(BUCKET_NAME, fullPath, fullBuffer, fullBuffer.length, { + 'Content-Type': 'image/png', + }), + ]); + + return { + thumbnail: thumbnailPath, + medium: mediumPath, + full: fullPath, + }; +} + export async function uploadImageFromUrl(imageUrl: string, productId: number, removeBackgrounds = true): Promise { try { - const client = getMinioClient(); // Download image const response = await axios.get(imageUrl, { responseType: 'arraybuffer' }); let buffer = Buffer.from(response.data); @@ -131,47 +209,44 @@ export async function uploadImageFromUrl(imageUrl: string, productId: number, re .toBuffer(), ]); - // Upload all sizes to Minio - const thumbnailPath = `${baseFilename}-thumb.png`; - const mediumPath = `${baseFilename}-medium.png`; - const fullPath = `${baseFilename}-full.png`; - - await Promise.all([ - client.putObject(BUCKET_NAME, thumbnailPath, thumbnailBuffer, thumbnailBuffer.length, { - 'Content-Type': 'image/png', - }), - client.putObject(BUCKET_NAME, mediumPath, mediumBuffer, mediumBuffer.length, { - 'Content-Type': 'image/png', - }), - client.putObject(BUCKET_NAME, fullPath, fullBuffer, fullBuffer.length, { - 'Content-Type': 'image/png', - }), - ]); + // Upload to appropriate storage backend + let result: ImageSizes; + if (isMinioEnabled()) { + result = await uploadToMinio(thumbnailBuffer, mediumBuffer, fullBuffer, baseFilename); + } else { + result = await uploadToLocalFilesystem(thumbnailBuffer, mediumBuffer, fullBuffer, baseFilename); + } console.log(`✅ Uploaded 3 sizes for product ${productId}: ${thumbnailBuffer.length + mediumBuffer.length + fullBuffer.length} bytes total`); - // Return all paths - return { - thumbnail: thumbnailPath, - medium: mediumPath, - full: fullPath, - }; + return result; } catch (error) { console.error('Error uploading image:', error); throw error; } } -export function getImageUrl(path: string): string { - // Use localhost:9020 for browser access since Minio is exposed on host port 9020 - const endpoint = process.env.MINIO_PUBLIC_ENDPOINT || 'http://localhost:9020'; - return `${endpoint}/${BUCKET_NAME}/${path}`; +export function getImageUrl(imagePath: string): string { + if (isMinioEnabled()) { + // Use MinIO endpoint for browser access + const endpoint = process.env.MINIO_PUBLIC_ENDPOINT || 'http://localhost:9020'; + return `${endpoint}/${BUCKET_NAME}/${imagePath}`; + } else { + // Use local path - served via Express static middleware + const publicUrl = process.env.PUBLIC_URL || ''; + return `${publicUrl}/images/${imagePath}`; + } } -export async function deleteImage(path: string): Promise { +export async function deleteImage(imagePath: string): Promise { try { - const client = getMinioClient(); - await client.removeObject(BUCKET_NAME, path); + if (isMinioEnabled()) { + const client = getMinioClient(); + await client.removeObject(BUCKET_NAME, imagePath); + } else { + const fullPath = path.join(LOCAL_IMAGES_PATH, imagePath); + await fs.unlink(fullPath); + } } catch (error) { console.error('Error deleting image:', error); } diff --git a/backend/tsconfig.json b/backend/tsconfig.json index 0391a5f2..2174cf8a 100755 --- a/backend/tsconfig.json +++ b/backend/tsconfig.json @@ -2,7 +2,7 @@ "compilerOptions": { "target": "ES2022", "module": "commonjs", - "lib": ["ES2022"], + "lib": ["ES2022", "dom"], "outDir": "./dist", "rootDir": "./src", "strict": true, diff --git a/frontend/Dockerfile b/frontend/Dockerfile new file mode 100644 index 00000000..d6496369 --- /dev/null +++ b/frontend/Dockerfile @@ -0,0 +1,52 @@ +# Build stage +FROM node:20-slim AS builder + +WORKDIR /app + +# Copy package files +COPY package*.json ./ + +# Install dependencies +RUN npm ci + +# Copy source files +COPY . . + +# Set build-time environment variable for API URL +ENV VITE_API_URL=https://dispos.crawlsy.com + +# Build the app +RUN npm run build + +# Production stage +FROM nginx:alpine + +# Copy built assets from builder stage +COPY --from=builder /app/dist /usr/share/nginx/html + +# Copy custom nginx config for SPA routing +RUN echo 'server { \ + listen 80; \ + server_name _; \ + root /usr/share/nginx/html; \ + index index.html; \ + \ + # Gzip compression \ + gzip on; \ + gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript; \ + \ + # Cache static assets \ + location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ { \ + expires 1y; \ + add_header Cache-Control "public, immutable"; \ + } \ + \ + # SPA fallback - serve index.html for all routes \ + location / { \ + try_files $uri $uri/ /index.html; \ + } \ +}' > /etc/nginx/conf.d/default.conf + +EXPOSE 80 + +CMD ["nginx", "-g", "daemon off;"] diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 8e03a5a2..6fd9a8c7 100755 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -2,6 +2,7 @@ import { BrowserRouter, Routes, Route, Navigate } from 'react-router-dom'; import { Login } from './pages/Login'; import { Dashboard } from './pages/Dashboard'; import { Products } from './pages/Products'; +import { ProductDetail } from './pages/ProductDetail'; import { Stores } from './pages/Stores'; import { Dispensaries } from './pages/Dispensaries'; import { DispensaryDetail } from './pages/DispensaryDetail'; @@ -27,6 +28,7 @@ export default function App() { } /> } /> } /> + } /> } /> } /> } /> diff --git a/frontend/src/pages/ProductDetail.tsx b/frontend/src/pages/ProductDetail.tsx new file mode 100644 index 00000000..4fd9aee4 --- /dev/null +++ b/frontend/src/pages/ProductDetail.tsx @@ -0,0 +1,269 @@ +import { useEffect, useState } from 'react'; +import { useParams, useNavigate } from 'react-router-dom'; +import { Layout } from '../components/Layout'; +import { api } from '../lib/api'; +import { ArrowLeft, ExternalLink, Package } from 'lucide-react'; + +export function ProductDetail() { + const { id } = useParams(); + const navigate = useNavigate(); + const [product, setProduct] = useState(null); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + + useEffect(() => { + loadProduct(); + }, [id]); + + const loadProduct = async () => { + if (!id) return; + + setLoading(true); + setError(null); + + try { + const data = await api.getProduct(parseInt(id)); + setProduct(data.product); + } catch (err: any) { + setError(err.message || 'Failed to load product'); + } finally { + setLoading(false); + } + }; + + if (loading) { + return ( + +
+
+
+
+ ); + } + + if (error || !product) { + return ( + +
+ +

Product not found

+

{error}

+ +
+
+ ); + } + + const metadata = product.metadata || {}; + + const getImageUrl = () => { + if (product.image_url_full) return product.image_url_full; + if (product.medium_path) return `http://localhost:9020/dutchie/${product.medium_path}`; + if (product.thumbnail_path) return `http://localhost:9020/dutchie/${product.thumbnail_path}`; + return null; + }; + + const imageUrl = getImageUrl(); + + return ( + +
+ {/* Back button */} + + +
+
+ {/* Product Image */} +
+ {imageUrl ? ( + {product.name} + ) : ( +
+ +
+ )} +
+ + {/* Product Info */} +
+ {/* Header */} +
+
+ {product.in_stock ? ( + + In Stock + + ) : ( + + Out of Stock + + )} + {product.strain_type && ( + + {product.strain_type} + + )} +
+ +

{product.name}

+ + {product.brand && ( +

{product.brand}

+ )} + +
+ {product.store_name && {product.store_name}} + {product.category_name && ( + <> + + {product.category_name} + + )} +
+
+ + {/* Price */} + {product.price !== null && ( +
+
+ ${parseFloat(product.price).toFixed(2)} +
+ {product.weight && ( +
+ {product.weight} +
+ )} +
+ )} + + {/* THC/CBD */} + {(product.thc_percentage || product.cbd_percentage) && ( +
+

Cannabinoid Content

+
+ {product.thc_percentage !== null && ( +
+
THC
+
{product.thc_percentage}%
+
+ )} + {product.cbd_percentage !== null && ( +
+
CBD
+
{product.cbd_percentage}%
+
+ )} +
+
+ )} + + {/* Description */} + {product.description && ( +
+

Description

+

{product.description}

+
+ )} + + {/* Terpenes */} + {metadata.terpenes && metadata.terpenes.length > 0 && ( +
+

Terpenes

+
+ {metadata.terpenes.map((terp: string) => ( + + {terp} + + ))} +
+
+ )} + + {/* Effects */} + {metadata.effects && metadata.effects.length > 0 && ( +
+

Effects

+
+ {metadata.effects.map((effect: string) => ( + + {effect} + + ))} +
+
+ )} + + {/* Flavors */} + {metadata.flavors && metadata.flavors.length > 0 && ( +
+

Flavors

+
+ {metadata.flavors.map((flavor: string) => ( + + {flavor} + + ))} +
+
+ )} + + {/* Lineage */} + {metadata.lineage && ( +
+

Lineage

+

{metadata.lineage}

+
+ )} + + {/* View on Dutchie link */} + {product.dutchie_url && ( + + )} + + {/* Last updated */} + {product.last_seen_at && ( +
+ Last updated: {new Date(product.last_seen_at).toLocaleString()} +
+ )} +
+
+
+
+
+ ); +} diff --git a/frontend/src/pages/Products.tsx b/frontend/src/pages/Products.tsx index e4c37ce5..f1089e4a 100755 --- a/frontend/src/pages/Products.tsx +++ b/frontend/src/pages/Products.tsx @@ -1,10 +1,11 @@ import { useEffect, useState } from 'react'; -import { useSearchParams } from 'react-router-dom'; +import { useSearchParams, useNavigate } from 'react-router-dom'; import { Layout } from '../components/Layout'; import { api } from '../lib/api'; export function Products() { const [searchParams, setSearchParams] = useSearchParams(); + const navigate = useNavigate(); const [products, setProducts] = useState([]); const [stores, setStores] = useState([]); const [categories, setCategories] = useState([]); @@ -322,7 +323,7 @@ export function Products() { marginBottom: '20px' }}> {products.map(product => ( - + navigate(`/products/${product.id}`)} /> ))} @@ -391,15 +392,27 @@ export function Products() { ); } -function ProductCard({ product }: { product: any }) { +function ProductCard({ product, onViewDetails }: { product: any; onViewDetails: () => void }) { + const formatDate = (dateStr: string) => { + if (!dateStr) return 'Never'; + const date = new Date(dateStr); + const now = new Date(); + const diffMs = now.getTime() - date.getTime(); + const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24)); + + if (diffDays === 0) return 'Today'; + if (diffDays === 1) return 'Yesterday'; + if (diffDays < 7) return `${diffDays} days ago`; + return date.toLocaleDateString(); + }; + return (
e.currentTarget.style.transform = 'translateY(-4px)'} onMouseLeave={(e) => e.currentTarget.style.transform = 'translateY(0)'} @@ -442,7 +455,7 @@ function ProductCard({ product }: { product: any }) { }}> {product.name}
-
+
{product.price ? `$${product.price}` : 'N/A'}
@@ -456,6 +469,62 @@ function ProductCard({ product }: { product: any }) { {product.in_stock ? 'In Stock' : 'Out of Stock'}
+ + {/* Last Updated */} +
+ Last Updated: {formatDate(product.last_seen_at)} +
+ + {/* Action Buttons */} +
+ {product.dutchie_url && ( + e.stopPropagation()} + > + Dutchie + + )} + +
); diff --git a/frontend/src/pages/StoreDetail.tsx b/frontend/src/pages/StoreDetail.tsx index 62462374..74c45ec5 100644 --- a/frontend/src/pages/StoreDetail.tsx +++ b/frontend/src/pages/StoreDetail.tsx @@ -333,6 +333,26 @@ export function StoreDetail() { Updated: {new Date(product.last_seen_at).toLocaleDateString()}

)} + + {/* Action Buttons */} +
+ {product.dutchie_url && ( + + Dutchie + + )} + +
))} diff --git a/frontend/tsconfig.json b/frontend/tsconfig.json index 3934b8f6..17f43b17 100755 --- a/frontend/tsconfig.json +++ b/frontend/tsconfig.json @@ -12,8 +12,8 @@ "noEmit": true, "jsx": "react-jsx", "strict": true, - "noUnusedLocals": true, - "noUnusedParameters": true, + "noUnusedLocals": false, + "noUnusedParameters": false, "noFallthroughCasesInSwitch": true }, "include": ["src"], diff --git a/k8s/configmap.yaml b/k8s/configmap.yaml new file mode 100644 index 00000000..3358dc63 --- /dev/null +++ b/k8s/configmap.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: scraper-config + namespace: dispensary-scraper +data: + NODE_ENV: "production" + PORT: "3010" + LOG_LEVEL: "info" diff --git a/k8s/frontend.yaml b/k8s/frontend.yaml new file mode 100644 index 00000000..97a98c0d --- /dev/null +++ b/k8s/frontend.yaml @@ -0,0 +1,41 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: frontend + namespace: dispensary-scraper +spec: + replicas: 1 + selector: + matchLabels: + app: frontend + template: + metadata: + labels: + app: frontend + spec: + imagePullSecrets: + - name: regcred + containers: + - name: frontend + image: code.cannabrands.app/creationshop/dispensary-scraper-frontend:latest + ports: + - containerPort: 80 + resources: + requests: + memory: "64Mi" + cpu: "50m" + limits: + memory: "128Mi" + cpu: "100m" +--- +apiVersion: v1 +kind: Service +metadata: + name: frontend + namespace: dispensary-scraper +spec: + selector: + app: frontend + ports: + - port: 80 + targetPort: 80 diff --git a/k8s/ingress.yaml b/k8s/ingress.yaml new file mode 100644 index 00000000..ae977145 --- /dev/null +++ b/k8s/ingress.yaml @@ -0,0 +1,31 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: scraper-ingress + namespace: dispensary-scraper + annotations: + kubernetes.io/ingress.class: nginx + cert-manager.io/cluster-issuer: letsencrypt-prod +spec: + tls: + - hosts: + - dispos.crawlsy.com + secretName: scraper-tls + rules: + - host: dispos.crawlsy.com + http: + paths: + - path: /api + pathType: Prefix + backend: + service: + name: scraper + port: + number: 80 + - path: / + pathType: Prefix + backend: + service: + name: frontend + port: + number: 80 diff --git a/k8s/namespace.yaml b/k8s/namespace.yaml new file mode 100644 index 00000000..839311d3 --- /dev/null +++ b/k8s/namespace.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: dispensary-scraper + labels: + app: dispensary-scraper diff --git a/k8s/postgres.yaml b/k8s/postgres.yaml new file mode 100644 index 00000000..86a7e3ae --- /dev/null +++ b/k8s/postgres.yaml @@ -0,0 +1,76 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: postgres-pvc + namespace: dispensary-scraper +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: postgres + namespace: dispensary-scraper +spec: + replicas: 1 + selector: + matchLabels: + app: postgres + template: + metadata: + labels: + app: postgres + spec: + containers: + - name: postgres + image: postgres:15-alpine + ports: + - containerPort: 5432 + env: + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + name: scraper-secrets + key: POSTGRES_USER + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: scraper-secrets + key: POSTGRES_PASSWORD + - name: POSTGRES_DB + valueFrom: + secretKeyRef: + name: scraper-secrets + key: POSTGRES_DB + - name: PGDATA + value: /var/lib/postgresql/data/pgdata + volumeMounts: + - name: postgres-storage + mountPath: /var/lib/postgresql/data + resources: + requests: + memory: "256Mi" + cpu: "250m" + limits: + memory: "512Mi" + cpu: "500m" + volumes: + - name: postgres-storage + persistentVolumeClaim: + claimName: postgres-pvc +--- +apiVersion: v1 +kind: Service +metadata: + name: postgres + namespace: dispensary-scraper +spec: + selector: + app: postgres + ports: + - port: 5432 + targetPort: 5432 diff --git a/k8s/scraper.yaml b/k8s/scraper.yaml new file mode 100644 index 00000000..3e4aaae4 --- /dev/null +++ b/k8s/scraper.yaml @@ -0,0 +1,53 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: scraper-images-pvc + namespace: dispensary-scraper +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: scraper + namespace: dispensary-scraper +spec: + replicas: 1 + selector: + matchLabels: + app: scraper + template: + metadata: + labels: + app: scraper + spec: + imagePullSecrets: + - name: regcred + containers: + - name: scraper + image: code.cannabrands.app/creationshop/dispensary-scraper:latest + ports: + - containerPort: 3010 + envFrom: + - configMapRef: + name: scraper-config + - secretRef: + name: scraper-secrets + volumeMounts: + - name: images-storage + mountPath: /app/public/images + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "1000m" + volumes: + - name: images-storage + persistentVolumeClaim: + claimName: scraper-images-pvc diff --git a/k8s/secrets.yaml b/k8s/secrets.yaml new file mode 100644 index 00000000..cb6987e5 --- /dev/null +++ b/k8s/secrets.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Secret +metadata: + name: scraper-secrets + namespace: dispensary-scraper +type: Opaque +stringData: + POSTGRES_USER: "scraper" + POSTGRES_PASSWORD: "Kx9$mVnQ2wLpZ4fT8jRbY7cH" + POSTGRES_DB: "dispensary_scraper" + DATABASE_URL: "postgresql://scraper:Kx9$mVnQ2wLpZ4fT8jRbY7cH@postgres:5432/dispensary_scraper" + JWT_SECRET: "aW7vN3xKpM9qLsT2fB5jDc8hR4wY6zXe" diff --git a/k8s/service.yaml b/k8s/service.yaml new file mode 100644 index 00000000..264f8a9e --- /dev/null +++ b/k8s/service.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Service +metadata: + name: scraper + namespace: dispensary-scraper +spec: + selector: + app: scraper + ports: + - port: 80 + targetPort: 3010