Files
cannaiq/backend/src/tasks/handlers/product-discovery-jane.ts
Kelly 9518ca48a5 feat(tasks): Task tracking, IP-per-store, and schedule edit fixes
- Add task completion verification with DB and output layers
- Add reconciliation loop to sync worker memory with DB state
- Implement IP-per-store-per-platform conflict detection
- Add task ID hash to MinIO payload filenames for traceability
- Fix schedule edit modal with dispensary info in API responses
- Add task ID display after dispensary name in worker dashboard
- Add migrations for proxy_ip and source tracking columns

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 10:49:21 -07:00

168 lines
4.9 KiB
TypeScript

/**
* Jane Product Discovery Handler
*
* Fetches all products from a Jane store via Puppeteer + network interception.
*
* Flow:
* 1. Load dispensary with platform_dispensary_id
* 2. Navigate to menu URL and capture Algolia product responses
* 3. Save raw payload to filesystem
* 4. Queue product_refresh task for normalization
*/
import { TaskContext, TaskResult } from '../task-worker';
import {
setCrawlRotator,
fetchProductsByStoreIdDirect,
} from '../../platforms/jane';
import { saveRawPayload } from '../../utils/payload-storage';
import { taskService } from '../task-service';
export async function handleProductDiscoveryJane(ctx: TaskContext): Promise<TaskResult> {
const { pool, task, crawlRotator } = ctx;
const dispensaryId = task.dispensary_id;
if (!dispensaryId) {
return {
success: false,
error: 'Missing dispensary_id in task',
};
}
console.log(`[JaneProductDiscovery] Starting for dispensary ${dispensaryId}`);
try {
// Load dispensary
const dispResult = await pool.query(
`SELECT id, name, menu_url, platform_dispensary_id, menu_type
FROM dispensaries WHERE id = $1`,
[dispensaryId]
);
if (dispResult.rows.length === 0) {
return {
success: false,
error: `Dispensary ${dispensaryId} not found`,
};
}
const dispensary = dispResult.rows[0];
if (!dispensary.platform_dispensary_id) {
return {
success: false,
error: `Dispensary ${dispensaryId} has no platform_dispensary_id (Jane store ID)`,
};
}
console.log(`[JaneProductDiscovery] Fetching products for Jane store ${dispensary.platform_dispensary_id}`);
// Attach crawl rotator
if (crawlRotator) {
setCrawlRotator(crawlRotator);
}
// Fetch products directly via Algolia
const result = await fetchProductsByStoreIdDirect(dispensary.platform_dispensary_id);
if (result.products.length === 0) {
console.warn(`[JaneProductDiscovery] No products captured for dispensary ${dispensaryId}`);
// Update dispensary with failure
await pool.query(
`UPDATE dispensaries
SET consecutive_failures = consecutive_failures + 1,
updated_at = NOW()
WHERE id = $1`,
[dispensaryId]
);
return {
success: false,
error: 'No products captured from Jane menu page',
productCount: 0,
};
}
console.log(`[JaneProductDiscovery] Captured ${result.products.length} products`);
// Build payload for storage
// Store the raw Algolia hits for the normalizer
const rawPayload = {
hits: result.products.map(p => p.raw), // Use raw product data
store: result.store?.raw || null,
capturedAt: new Date().toISOString(),
platform: 'jane',
dispensaryId,
storeId: dispensary.platform_dispensary_id,
};
// Save raw payload to filesystem (platform = 'jane')
const { id: payloadId, sizeBytes } = await saveRawPayload(
pool,
dispensaryId,
rawPayload,
null, // crawl_run_id
result.products.length,
'jane', // platform
task.id // task ID for traceability
);
console.log(`[JaneProductDiscovery] Saved payload ${payloadId} (${Math.round(sizeBytes / 1024)}KB)`);
// Update dispensary stage and timestamps
await pool.query(
`UPDATE dispensaries
SET stage = 'hydrating',
last_fetch_at = NOW(),
consecutive_successes = consecutive_successes + 1,
consecutive_failures = 0,
updated_at = NOW()
WHERE id = $1`,
[dispensaryId]
);
// Queue product_refresh task for normalization
console.log(`[JaneProductDiscovery] Queuing product_refresh for payload ${payloadId}`);
await taskService.createTask({
role: 'product_refresh',
dispensary_id: dispensaryId,
platform: 'jane',
// method undefined = any worker can process (product_refresh is local)
priority: task.priority || 0,
payload: { payload_id: payloadId },
});
return {
success: true,
productCount: result.products.length,
payloadId,
payloadSizeKB: Math.round(sizeBytes / 1024),
storeInfo: result.store ? {
id: result.store.id,
name: result.store.name,
productCount: result.store.product_count,
} : null,
queuedProductRefresh: true,
};
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
console.error(`[JaneProductDiscovery] Error:`, errorMessage);
// Update dispensary with failure
await pool.query(
`UPDATE dispensaries
SET consecutive_failures = consecutive_failures + 1,
stage = CASE WHEN consecutive_failures >= 2 THEN 'failing' ELSE stage END,
updated_at = NOW()
WHERE id = $1`,
[dispensaryId]
).catch(() => {});
return {
success: false,
error: errorMessage,
};
}
}