feat: Parallelized store discovery, modification tracking, and task deduplication

Store Discovery Parallelization:
- Add store_discovery_state handler for per-state parallel discovery
- Add POST /api/tasks/batch/store-discovery endpoint
- 8 workers can now process states in parallel (~30-45 min vs 3+ hours)

Modification Tracking (Migration 090):
- Add last_modified_at, last_modified_by_task, last_modified_task_id to dispensaries
- Add same columns to store_products
- Update all handlers to set tracking info on modifications

Stale Task Recovery:
- Add periodic stale cleanup every 10 minutes (worker-0 only)
- Prevents orphaned tasks from blocking queue after worker crashes

Task Deduplication:
- createStaggeredTasks now skips if pending/active task exists for same role
- Skips if same role completed within last 4 hours
- API responses include skipped count

🤖 Generated with [Claude Code](https://claude.com/claude-code)
This commit is contained in:
Kelly
2025-12-12 22:15:04 -07:00
parent e4e8438d8b
commit c62f8cbf06
11 changed files with 815 additions and 51 deletions

View File

@@ -131,6 +131,14 @@ export interface PromotionSummary {
newDispensaryIds: number[];
}
/**
* Task tracking info for modification audit trail
*/
export interface TaskTrackingInfo {
taskId: number;
taskRole: string;
}
/**
* Generate a URL-safe slug from name and city
*/
@@ -283,7 +291,8 @@ async function ensureCrawlerProfile(
* Idempotent: uses ON CONFLICT on platform_dispensary_id
*/
async function promoteLocation(
loc: DiscoveryLocationRow
loc: DiscoveryLocationRow,
taskTracking?: TaskTrackingInfo
): Promise<PromotionResult> {
const slug = loc.platform_slug || generateSlug(loc.name, loc.city || '', loc.state_code || '');
@@ -325,13 +334,16 @@ async function promoteLocation(
dutchie_verified,
dutchie_verified_at,
dutchie_discovery_id,
last_modified_at,
last_modified_by_task,
last_modified_task_id,
created_at,
updated_at
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
$21, $22, $23, $24, $25, $26, $27, $28, $29, $30,
$31, $32, $33, $34, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
$31, $32, $33, $34, $35, $36, $37, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
)
ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL
DO UPDATE SET
@@ -362,6 +374,9 @@ async function promoteLocation(
country = EXCLUDED.country,
status = EXCLUDED.status,
dutchie_discovery_id = EXCLUDED.dutchie_discovery_id,
last_modified_at = EXCLUDED.last_modified_at,
last_modified_by_task = EXCLUDED.last_modified_by_task,
last_modified_task_id = EXCLUDED.last_modified_task_id,
updated_at = CURRENT_TIMESTAMP
RETURNING id, (xmax = 0) AS inserted
`, [
@@ -399,6 +414,9 @@ async function promoteLocation(
true, // $32 dutchie_verified
new Date(), // $33 dutchie_verified_at
loc.id, // $34 dutchie_discovery_id
taskTracking ? new Date() : null, // $35 last_modified_at
taskTracking?.taskRole || null, // $36 last_modified_by_task
taskTracking?.taskId || null, // $37 last_modified_task_id
]);
const dispensaryId = upsertResult.rows[0].id;
@@ -446,10 +464,12 @@ async function promoteLocation(
*
* @param stateCode Optional filter by state (e.g., 'CA', 'AZ')
* @param dryRun If true, only validate without making changes
* @param taskTracking Optional task info for modification audit trail
*/
export async function promoteDiscoveredLocations(
stateCode?: string,
dryRun = false
dryRun = false,
taskTracking?: TaskTrackingInfo
): Promise<PromotionSummary> {
const startTime = Date.now();
@@ -524,7 +544,7 @@ export async function promoteDiscoveredLocations(
}
try {
const promotionResult = await promoteLocation(loc);
const promotionResult = await promoteLocation(loc, taskTracking);
results.push(promotionResult);
if (promotionResult.action === 'created') {