feat(tasks): Refactor task workflow with payload/refresh separation
Major changes: - Split crawl into payload_fetch (API → disk) and product_refresh (disk → DB) - Add task chaining: store_discovery → product_discovery → payload_fetch → product_refresh - Add payload storage utilities for gzipped JSON on filesystem - Add /api/payloads endpoints for payload access and diffing - Add DB-driven TaskScheduler with schedule persistence - Track newDispensaryIds through discovery promotion for chaining - Add stealth improvements: HTTP fingerprinting, proxy rotation enhancements - Add Workers dashboard K8s scaling controls New files: - src/tasks/handlers/payload-fetch.ts - Fetches from API, saves to disk - src/services/task-scheduler.ts - DB-driven schedule management - src/utils/payload-storage.ts - Payload save/load utilities - src/routes/payloads.ts - Payload API endpoints - src/services/http-fingerprint.ts - Browser fingerprint generation - docs/TASK_WORKFLOW_2024-12-10.md - Complete workflow documentation Migrations: - 078: Proxy consecutive 403 tracking - 079: task_schedules table - 080: raw_crawl_payloads table - 081: payload column and last_fetch_at 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -127,6 +127,8 @@ export interface PromotionSummary {
|
||||
errors: string[];
|
||||
}>;
|
||||
durationMs: number;
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
||||
newDispensaryIds: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -469,6 +471,8 @@ export async function promoteDiscoveredLocations(
|
||||
|
||||
const results: PromotionResult[] = [];
|
||||
const rejectedRecords: PromotionSummary['rejectedRecords'] = [];
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
||||
const newDispensaryIds: number[] = [];
|
||||
let created = 0;
|
||||
let updated = 0;
|
||||
let skipped = 0;
|
||||
@@ -525,6 +529,8 @@ export async function promoteDiscoveredLocations(
|
||||
|
||||
if (promotionResult.action === 'created') {
|
||||
created++;
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Track new IDs for task chaining
|
||||
newDispensaryIds.push(promotionResult.dispensaryId);
|
||||
} else {
|
||||
updated++;
|
||||
}
|
||||
@@ -548,6 +554,8 @@ export async function promoteDiscoveredLocations(
|
||||
results,
|
||||
rejectedRecords,
|
||||
durationMs: Date.now() - startTime,
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Return new IDs for task chaining
|
||||
newDispensaryIds,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user