Major changes: - Split crawl into payload_fetch (API → disk) and product_refresh (disk → DB) - Add task chaining: store_discovery → product_discovery → payload_fetch → product_refresh - Add payload storage utilities for gzipped JSON on filesystem - Add /api/payloads endpoints for payload access and diffing - Add DB-driven TaskScheduler with schedule persistence - Track newDispensaryIds through discovery promotion for chaining - Add stealth improvements: HTTP fingerprinting, proxy rotation enhancements - Add Workers dashboard K8s scaling controls New files: - src/tasks/handlers/payload-fetch.ts - Fetches from API, saves to disk - src/services/task-scheduler.ts - DB-driven schedule management - src/utils/payload-storage.ts - Payload save/load utilities - src/routes/payloads.ts - Payload API endpoints - src/services/http-fingerprint.ts - Browser fingerprint generation - docs/TASK_WORKFLOW_2024-12-10.md - Complete workflow documentation Migrations: - 078: Proxy consecutive 403 tracking - 079: task_schedules table - 080: raw_crawl_payloads table - 081: payload column and last_fetch_at 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
53 lines
1.1 KiB
TypeScript
53 lines
1.1 KiB
TypeScript
/**
|
|
* Dutchie Platform Module
|
|
*
|
|
* Single export point for all Dutchie communication.
|
|
* All Dutchie workers MUST import from this module.
|
|
*/
|
|
|
|
export {
|
|
// HTTP Client (per workflow-12102025.md: uses curl-impersonate + ordered headers)
|
|
curlPost,
|
|
curlGet,
|
|
executeGraphQL,
|
|
fetchPage,
|
|
extractNextData,
|
|
|
|
// Headers (per workflow-12102025.md: browser-specific ordering)
|
|
buildHeaders,
|
|
|
|
// Session Management (per workflow-12102025.md: menuUrl for dynamic Referer)
|
|
startSession,
|
|
endSession,
|
|
getCurrentSession,
|
|
|
|
// Proxy
|
|
setProxy,
|
|
getProxy,
|
|
setCrawlRotator,
|
|
getCrawlRotator,
|
|
|
|
// Configuration
|
|
DUTCHIE_CONFIG,
|
|
GRAPHQL_HASHES,
|
|
|
|
// Types
|
|
type CurlResponse,
|
|
type Fingerprint,
|
|
type CrawlSession,
|
|
type ExecuteGraphQLOptions,
|
|
type FetchPageOptions,
|
|
} from './client';
|
|
|
|
// Re-export CrawlRotator types from canonical location
|
|
export type { CrawlRotator, Proxy, ProxyStats } from '../../services/crawl-rotator';
|
|
|
|
// GraphQL Queries
|
|
export {
|
|
resolveDispensaryId,
|
|
resolveDispensaryIdWithDetails,
|
|
getDispensaryInfo,
|
|
type ResolveDispensaryResult,
|
|
type DispensaryInfo,
|
|
} from './queries';
|