- Add task completion verification with DB and output layers - Add reconciliation loop to sync worker memory with DB state - Implement IP-per-store-per-platform conflict detection - Add task ID hash to MinIO payload filenames for traceability - Fix schedule edit modal with dispensary info in API responses - Add task ID display after dispensary name in worker dashboard - Add migrations for proxy_ip and source tracking columns 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
168 lines
4.9 KiB
TypeScript
168 lines
4.9 KiB
TypeScript
/**
|
|
* Jane Product Discovery Handler
|
|
*
|
|
* Fetches all products from a Jane store via Puppeteer + network interception.
|
|
*
|
|
* Flow:
|
|
* 1. Load dispensary with platform_dispensary_id
|
|
* 2. Navigate to menu URL and capture Algolia product responses
|
|
* 3. Save raw payload to filesystem
|
|
* 4. Queue product_refresh task for normalization
|
|
*/
|
|
|
|
import { TaskContext, TaskResult } from '../task-worker';
|
|
import {
|
|
setCrawlRotator,
|
|
fetchProductsByStoreIdDirect,
|
|
} from '../../platforms/jane';
|
|
import { saveRawPayload } from '../../utils/payload-storage';
|
|
import { taskService } from '../task-service';
|
|
|
|
export async function handleProductDiscoveryJane(ctx: TaskContext): Promise<TaskResult> {
|
|
const { pool, task, crawlRotator } = ctx;
|
|
const dispensaryId = task.dispensary_id;
|
|
|
|
if (!dispensaryId) {
|
|
return {
|
|
success: false,
|
|
error: 'Missing dispensary_id in task',
|
|
};
|
|
}
|
|
|
|
console.log(`[JaneProductDiscovery] Starting for dispensary ${dispensaryId}`);
|
|
|
|
try {
|
|
// Load dispensary
|
|
const dispResult = await pool.query(
|
|
`SELECT id, name, menu_url, platform_dispensary_id, menu_type
|
|
FROM dispensaries WHERE id = $1`,
|
|
[dispensaryId]
|
|
);
|
|
|
|
if (dispResult.rows.length === 0) {
|
|
return {
|
|
success: false,
|
|
error: `Dispensary ${dispensaryId} not found`,
|
|
};
|
|
}
|
|
|
|
const dispensary = dispResult.rows[0];
|
|
|
|
if (!dispensary.platform_dispensary_id) {
|
|
return {
|
|
success: false,
|
|
error: `Dispensary ${dispensaryId} has no platform_dispensary_id (Jane store ID)`,
|
|
};
|
|
}
|
|
|
|
console.log(`[JaneProductDiscovery] Fetching products for Jane store ${dispensary.platform_dispensary_id}`);
|
|
|
|
// Attach crawl rotator
|
|
if (crawlRotator) {
|
|
setCrawlRotator(crawlRotator);
|
|
}
|
|
|
|
// Fetch products directly via Algolia
|
|
const result = await fetchProductsByStoreIdDirect(dispensary.platform_dispensary_id);
|
|
|
|
if (result.products.length === 0) {
|
|
console.warn(`[JaneProductDiscovery] No products captured for dispensary ${dispensaryId}`);
|
|
|
|
// Update dispensary with failure
|
|
await pool.query(
|
|
`UPDATE dispensaries
|
|
SET consecutive_failures = consecutive_failures + 1,
|
|
updated_at = NOW()
|
|
WHERE id = $1`,
|
|
[dispensaryId]
|
|
);
|
|
|
|
return {
|
|
success: false,
|
|
error: 'No products captured from Jane menu page',
|
|
productCount: 0,
|
|
};
|
|
}
|
|
|
|
console.log(`[JaneProductDiscovery] Captured ${result.products.length} products`);
|
|
|
|
// Build payload for storage
|
|
// Store the raw Algolia hits for the normalizer
|
|
const rawPayload = {
|
|
hits: result.products.map(p => p.raw), // Use raw product data
|
|
store: result.store?.raw || null,
|
|
capturedAt: new Date().toISOString(),
|
|
platform: 'jane',
|
|
dispensaryId,
|
|
storeId: dispensary.platform_dispensary_id,
|
|
};
|
|
|
|
// Save raw payload to filesystem (platform = 'jane')
|
|
const { id: payloadId, sizeBytes } = await saveRawPayload(
|
|
pool,
|
|
dispensaryId,
|
|
rawPayload,
|
|
null, // crawl_run_id
|
|
result.products.length,
|
|
'jane', // platform
|
|
task.id // task ID for traceability
|
|
);
|
|
|
|
console.log(`[JaneProductDiscovery] Saved payload ${payloadId} (${Math.round(sizeBytes / 1024)}KB)`);
|
|
|
|
// Update dispensary stage and timestamps
|
|
await pool.query(
|
|
`UPDATE dispensaries
|
|
SET stage = 'hydrating',
|
|
last_fetch_at = NOW(),
|
|
consecutive_successes = consecutive_successes + 1,
|
|
consecutive_failures = 0,
|
|
updated_at = NOW()
|
|
WHERE id = $1`,
|
|
[dispensaryId]
|
|
);
|
|
|
|
// Queue product_refresh task for normalization
|
|
console.log(`[JaneProductDiscovery] Queuing product_refresh for payload ${payloadId}`);
|
|
await taskService.createTask({
|
|
role: 'product_refresh',
|
|
dispensary_id: dispensaryId,
|
|
platform: 'jane',
|
|
// method undefined = any worker can process (product_refresh is local)
|
|
priority: task.priority || 0,
|
|
payload: { payload_id: payloadId },
|
|
});
|
|
|
|
return {
|
|
success: true,
|
|
productCount: result.products.length,
|
|
payloadId,
|
|
payloadSizeKB: Math.round(sizeBytes / 1024),
|
|
storeInfo: result.store ? {
|
|
id: result.store.id,
|
|
name: result.store.name,
|
|
productCount: result.store.product_count,
|
|
} : null,
|
|
queuedProductRefresh: true,
|
|
};
|
|
} catch (error: unknown) {
|
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
console.error(`[JaneProductDiscovery] Error:`, errorMessage);
|
|
|
|
// Update dispensary with failure
|
|
await pool.query(
|
|
`UPDATE dispensaries
|
|
SET consecutive_failures = consecutive_failures + 1,
|
|
stage = CASE WHEN consecutive_failures >= 2 THEN 'failing' ELSE stage END,
|
|
updated_at = NOW()
|
|
WHERE id = $1`,
|
|
[dispensaryId]
|
|
).catch(() => {});
|
|
|
|
return {
|
|
success: false,
|
|
error: errorMessage,
|
|
};
|
|
}
|
|
}
|