feat: Parallelized store discovery, modification tracking, and task deduplication
Store Discovery Parallelization: - Add store_discovery_state handler for per-state parallel discovery - Add POST /api/tasks/batch/store-discovery endpoint - 8 workers can now process states in parallel (~30-45 min vs 3+ hours) Modification Tracking (Migration 090): - Add last_modified_at, last_modified_by_task, last_modified_task_id to dispensaries - Add same columns to store_products - Update all handlers to set tracking info on modifications Stale Task Recovery: - Add periodic stale cleanup every 10 minutes (worker-0 only) - Prevents orphaned tasks from blocking queue after worker crashes Task Deduplication: - createStaggeredTasks now skips if pending/active task exists for same role - Skips if same role completed within last 4 hours - API responses include skipped count 🤖 Generated with [Claude Code](https://claude.com/claude-code)
This commit is contained in:
@@ -131,6 +131,14 @@ export interface PromotionSummary {
|
||||
newDispensaryIds: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Task tracking info for modification audit trail
|
||||
*/
|
||||
export interface TaskTrackingInfo {
|
||||
taskId: number;
|
||||
taskRole: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a URL-safe slug from name and city
|
||||
*/
|
||||
@@ -283,7 +291,8 @@ async function ensureCrawlerProfile(
|
||||
* Idempotent: uses ON CONFLICT on platform_dispensary_id
|
||||
*/
|
||||
async function promoteLocation(
|
||||
loc: DiscoveryLocationRow
|
||||
loc: DiscoveryLocationRow,
|
||||
taskTracking?: TaskTrackingInfo
|
||||
): Promise<PromotionResult> {
|
||||
const slug = loc.platform_slug || generateSlug(loc.name, loc.city || '', loc.state_code || '');
|
||||
|
||||
@@ -325,13 +334,16 @@ async function promoteLocation(
|
||||
dutchie_verified,
|
||||
dutchie_verified_at,
|
||||
dutchie_discovery_id,
|
||||
last_modified_at,
|
||||
last_modified_by_task,
|
||||
last_modified_task_id,
|
||||
created_at,
|
||||
updated_at
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
|
||||
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
|
||||
$21, $22, $23, $24, $25, $26, $27, $28, $29, $30,
|
||||
$31, $32, $33, $34, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
|
||||
$31, $32, $33, $34, $35, $36, $37, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
|
||||
)
|
||||
ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL
|
||||
DO UPDATE SET
|
||||
@@ -362,6 +374,9 @@ async function promoteLocation(
|
||||
country = EXCLUDED.country,
|
||||
status = EXCLUDED.status,
|
||||
dutchie_discovery_id = EXCLUDED.dutchie_discovery_id,
|
||||
last_modified_at = EXCLUDED.last_modified_at,
|
||||
last_modified_by_task = EXCLUDED.last_modified_by_task,
|
||||
last_modified_task_id = EXCLUDED.last_modified_task_id,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
RETURNING id, (xmax = 0) AS inserted
|
||||
`, [
|
||||
@@ -399,6 +414,9 @@ async function promoteLocation(
|
||||
true, // $32 dutchie_verified
|
||||
new Date(), // $33 dutchie_verified_at
|
||||
loc.id, // $34 dutchie_discovery_id
|
||||
taskTracking ? new Date() : null, // $35 last_modified_at
|
||||
taskTracking?.taskRole || null, // $36 last_modified_by_task
|
||||
taskTracking?.taskId || null, // $37 last_modified_task_id
|
||||
]);
|
||||
|
||||
const dispensaryId = upsertResult.rows[0].id;
|
||||
@@ -446,10 +464,12 @@ async function promoteLocation(
|
||||
*
|
||||
* @param stateCode Optional filter by state (e.g., 'CA', 'AZ')
|
||||
* @param dryRun If true, only validate without making changes
|
||||
* @param taskTracking Optional task info for modification audit trail
|
||||
*/
|
||||
export async function promoteDiscoveredLocations(
|
||||
stateCode?: string,
|
||||
dryRun = false
|
||||
dryRun = false,
|
||||
taskTracking?: TaskTrackingInfo
|
||||
): Promise<PromotionSummary> {
|
||||
const startTime = Date.now();
|
||||
|
||||
@@ -524,7 +544,7 @@ export async function promoteDiscoveredLocations(
|
||||
}
|
||||
|
||||
try {
|
||||
const promotionResult = await promoteLocation(loc);
|
||||
const promotionResult = await promoteLocation(loc, taskTracking);
|
||||
results.push(promotionResult);
|
||||
|
||||
if (promotionResult.action === 'created') {
|
||||
|
||||
Reference in New Issue
Block a user