Major changes: - Split crawl into payload_fetch (API → disk) and product_refresh (disk → DB) - Add task chaining: store_discovery → product_discovery → payload_fetch → product_refresh - Add payload storage utilities for gzipped JSON on filesystem - Add /api/payloads endpoints for payload access and diffing - Add DB-driven TaskScheduler with schedule persistence - Track newDispensaryIds through discovery promotion for chaining - Add stealth improvements: HTTP fingerprinting, proxy rotation enhancements - Add Workers dashboard K8s scaling controls New files: - src/tasks/handlers/payload-fetch.ts - Fetches from API, saves to disk - src/services/task-scheduler.ts - DB-driven schedule management - src/utils/payload-storage.ts - Payload save/load utilities - src/routes/payloads.ts - Payload API endpoints - src/services/http-fingerprint.ts - Browser fingerprint generation - docs/TASK_WORKFLOW_2024-12-10.md - Complete workflow documentation Migrations: - 078: Proxy consecutive 403 tracking - 079: task_schedules table - 080: raw_crawl_payloads table - 081: payload column and last_fetch_at 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
69 lines
2.4 KiB
JSON
Executable File
69 lines
2.4 KiB
JSON
Executable File
{
|
|
"name": "dutchie-menus-backend",
|
|
"version": "1.6.0",
|
|
"description": "Backend API for Dutchie Menus scraper and management",
|
|
"main": "dist/index.js",
|
|
"scripts": {
|
|
"dev": "tsx watch src/index.ts",
|
|
"dev:worker": "tsx watch src/cli.ts --worker",
|
|
"build": "tsc",
|
|
"start": "node dist/index.js",
|
|
"start:worker": "node dist/cli.js --worker",
|
|
"worker": "tsx src/cli.ts --worker",
|
|
"migrate": "tsx src/db/migrate.ts",
|
|
"seed": "tsx src/db/seed.ts",
|
|
"migrate:az": "tsx src/dutchie-az/db/migrate.ts",
|
|
"health:az": "tsx -e \"import { healthCheck } from './src/dutchie-az/db/connection'; (async()=>{ const ok=await healthCheck(); console.log(ok?'AZ DB healthy':'AZ DB NOT reachable'); process.exit(ok?0:1); })();\"",
|
|
"system:smoke-test": "tsx src/scripts/system-smoke-test.ts",
|
|
"discovery:dt:cities:auto": "tsx src/dutchie-az/discovery/discovery-dt-cities-auto.ts",
|
|
"discovery:dt:cities:manual": "tsx src/dutchie-az/discovery/discovery-dt-cities-manual-seed.ts",
|
|
"discovery:dt:locations": "tsx src/dutchie-az/discovery/discovery-dt-locations-from-cities.ts",
|
|
"backfill:legacy:canonical": "tsx src/scripts/backfill-legacy-to-canonical.ts",
|
|
"seed:dt:cities:bulk": "tsx src/scripts/seed-dt-cities-bulk.ts"
|
|
},
|
|
"dependencies": {
|
|
"@kubernetes/client-node": "^1.4.0",
|
|
"@types/bcryptjs": "^3.0.0",
|
|
"axios": "^1.6.2",
|
|
"bcrypt": "^5.1.1",
|
|
"bcryptjs": "^3.0.3",
|
|
"bullmq": "^5.65.1",
|
|
"cheerio": "^1.1.2",
|
|
"cors": "^2.8.5",
|
|
"dotenv": "^16.3.1",
|
|
"express": "^4.18.2",
|
|
"express-rate-limit": "^7.1.5",
|
|
"helmet": "^7.1.0",
|
|
"https-proxy-agent": "^7.0.2",
|
|
"ioredis": "^5.8.2",
|
|
"ip2location-nodejs": "^9.7.0",
|
|
"ipaddr.js": "^2.2.0",
|
|
"jsonwebtoken": "^9.0.2",
|
|
"minio": "^7.1.3",
|
|
"node-cron": "^3.0.3",
|
|
"pg": "^8.11.3",
|
|
"playwright": "^1.56.1",
|
|
"playwright-extra": "^4.3.6",
|
|
"puppeteer": "^21.0.0",
|
|
"puppeteer-extra": "^3.3.6",
|
|
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
|
"sharp": "^0.32.0",
|
|
"socks-proxy-agent": "^8.0.2",
|
|
"user-agents": "^1.1.669",
|
|
"uuid": "^9.0.1",
|
|
"zod": "^3.22.4"
|
|
},
|
|
"devDependencies": {
|
|
"@types/bcrypt": "^5.0.2",
|
|
"@types/cors": "^2.8.17",
|
|
"@types/express": "^4.17.21",
|
|
"@types/jsonwebtoken": "^9.0.5",
|
|
"@types/node": "^20.10.5",
|
|
"@types/node-cron": "^3.0.11",
|
|
"@types/pg": "^8.15.6",
|
|
"@types/uuid": "^9.0.7",
|
|
"tsx": "^4.7.0",
|
|
"typescript": "^5.3.3"
|
|
}
|
|
}
|