Files
cannaiq/backend/migrations/090_modification_tracking.sql
Kelly c62f8cbf06 feat: Parallelized store discovery, modification tracking, and task deduplication
Store Discovery Parallelization:
- Add store_discovery_state handler for per-state parallel discovery
- Add POST /api/tasks/batch/store-discovery endpoint
- 8 workers can now process states in parallel (~30-45 min vs 3+ hours)

Modification Tracking (Migration 090):
- Add last_modified_at, last_modified_by_task, last_modified_task_id to dispensaries
- Add same columns to store_products
- Update all handlers to set tracking info on modifications

Stale Task Recovery:
- Add periodic stale cleanup every 10 minutes (worker-0 only)
- Prevents orphaned tasks from blocking queue after worker crashes

Task Deduplication:
- createStaggeredTasks now skips if pending/active task exists for same role
- Skips if same role completed within last 4 hours
- API responses include skipped count

🤖 Generated with [Claude Code](https://claude.com/claude-code)
2025-12-12 22:15:04 -07:00

67 lines
2.9 KiB
SQL

-- Migration 090: Add modification tracking columns
--
-- Tracks when records were last modified and by which task.
-- Enables debugging, auditing, and understanding data freshness.
--
-- Columns added:
-- last_modified_at - When the record was last modified by a task
-- last_modified_by_task - Which task role modified it (e.g., 'product_refresh')
-- last_modified_task_id - The specific task ID that modified it
-- ============================================================
-- dispensaries table
-- ============================================================
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS last_modified_at TIMESTAMPTZ;
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS last_modified_by_task VARCHAR(50);
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS last_modified_task_id INTEGER;
-- Index for querying recently modified records
CREATE INDEX IF NOT EXISTS idx_dispensaries_last_modified
ON dispensaries(last_modified_at DESC)
WHERE last_modified_at IS NOT NULL;
-- Index for querying by task type
CREATE INDEX IF NOT EXISTS idx_dispensaries_modified_by_task
ON dispensaries(last_modified_by_task)
WHERE last_modified_by_task IS NOT NULL;
COMMENT ON COLUMN dispensaries.last_modified_at IS 'Timestamp when this record was last modified by a task';
COMMENT ON COLUMN dispensaries.last_modified_by_task IS 'Task role that last modified this record (e.g., store_discovery_state, entry_point_discovery)';
COMMENT ON COLUMN dispensaries.last_modified_task_id IS 'ID of the worker_tasks record that last modified this';
-- ============================================================
-- store_products table
-- ============================================================
ALTER TABLE store_products
ADD COLUMN IF NOT EXISTS last_modified_at TIMESTAMPTZ;
ALTER TABLE store_products
ADD COLUMN IF NOT EXISTS last_modified_by_task VARCHAR(50);
ALTER TABLE store_products
ADD COLUMN IF NOT EXISTS last_modified_task_id INTEGER;
-- Index for querying recently modified products
CREATE INDEX IF NOT EXISTS idx_store_products_last_modified
ON store_products(last_modified_at DESC)
WHERE last_modified_at IS NOT NULL;
-- Index for querying by task type
CREATE INDEX IF NOT EXISTS idx_store_products_modified_by_task
ON store_products(last_modified_by_task)
WHERE last_modified_by_task IS NOT NULL;
-- Composite index for finding products modified by a specific task
CREATE INDEX IF NOT EXISTS idx_store_products_task_modified
ON store_products(dispensary_id, last_modified_at DESC)
WHERE last_modified_at IS NOT NULL;
COMMENT ON COLUMN store_products.last_modified_at IS 'Timestamp when this record was last modified by a task';
COMMENT ON COLUMN store_products.last_modified_by_task IS 'Task role that last modified this record (e.g., product_refresh, product_discovery)';
COMMENT ON COLUMN store_products.last_modified_task_id IS 'ID of the worker_tasks record that last modified this';