/** * Crawler Profiles Service * * Manages per-store crawler configuration profiles. * This service handles CRUD operations for dispensary_crawler_profiles * and provides helper functions for loading active profiles. * * Phase 1: Basic profile loading for Dutchie production crawls only. */ import { pool } from '../db/pool'; import { DispensaryCrawlerProfile, DispensaryCrawlerProfileCreate, DispensaryCrawlerProfileUpdate, CrawlerProfileOptions, } from '../types'; // ============================================================ // Database Row Mapping // ============================================================ /** * Map database row (snake_case) to TypeScript interface (camelCase) */ function mapDbRowToProfile(row: any): DispensaryCrawlerProfile { return { id: row.id, dispensaryId: row.dispensary_id, profileName: row.profile_name, crawlerType: row.crawler_type, profileKey: row.profile_key, config: row.config || {}, timeoutMs: row.timeout_ms, downloadImages: row.download_images, trackStock: row.track_stock, version: row.version, enabled: row.enabled, createdAt: row.created_at, updatedAt: row.updated_at, }; } // ============================================================ // Profile Retrieval // ============================================================ /** * Get the active crawler profile for a dispensary. * * Resolution order: * 1. If dispensaries.active_crawler_profile_id is set, load that profile (if enabled) * 2. Otherwise, find the most recently created enabled profile matching the dispensary's * menu_type (for Dutchie, crawler_type = 'dutchie') * 3. Returns null if no matching profile exists * * @param dispensaryId - The dispensary ID to look up * @param crawlerType - Optional: filter by crawler type (defaults to checking menu_type) */ export async function getActiveCrawlerProfileForDispensary( dispensaryId: number, crawlerType?: string ): Promise { // First, check if there's an explicit active_crawler_profile_id set const activeProfileResult = await pool.query( `SELECT dcp.* FROM dispensary_crawler_profiles dcp INNER JOIN dispensaries d ON d.active_crawler_profile_id = dcp.id WHERE d.id = $1 AND dcp.enabled = true`, [dispensaryId] ); if (activeProfileResult.rows.length > 0) { return mapDbRowToProfile(activeProfileResult.rows[0]); } // No explicit active profile - fall back to most recent enabled profile // If crawlerType not specified, try to match dispensary's menu_type let effectiveCrawlerType = crawlerType; if (!effectiveCrawlerType) { const dispensaryResult = await pool.query( `SELECT menu_type FROM dispensaries WHERE id = $1`, [dispensaryId] ); if (dispensaryResult.rows.length > 0 && dispensaryResult.rows[0].menu_type) { effectiveCrawlerType = dispensaryResult.rows[0].menu_type; } } // If we still don't have a crawler type, default to 'dutchie' for Phase 1 if (!effectiveCrawlerType) { effectiveCrawlerType = 'dutchie'; } const fallbackResult = await pool.query( `SELECT * FROM dispensary_crawler_profiles WHERE dispensary_id = $1 AND crawler_type = $2 AND enabled = true ORDER BY created_at DESC LIMIT 1`, [dispensaryId, effectiveCrawlerType] ); if (fallbackResult.rows.length > 0) { return mapDbRowToProfile(fallbackResult.rows[0]); } return null; } /** * Get all profiles for a dispensary */ export async function getProfilesForDispensary( dispensaryId: number ): Promise { const result = await pool.query( `SELECT * FROM dispensary_crawler_profiles WHERE dispensary_id = $1 ORDER BY created_at DESC`, [dispensaryId] ); return result.rows.map(mapDbRowToProfile); } /** * Get a profile by ID */ export async function getProfileById( profileId: number ): Promise { const result = await pool.query( `SELECT * FROM dispensary_crawler_profiles WHERE id = $1`, [profileId] ); if (result.rows.length === 0) { return null; } return mapDbRowToProfile(result.rows[0]); } // ============================================================ // Profile Creation & Update // ============================================================ /** * Create a new crawler profile */ export async function createCrawlerProfile( profile: DispensaryCrawlerProfileCreate ): Promise { const result = await pool.query( `INSERT INTO dispensary_crawler_profiles ( dispensary_id, profile_name, crawler_type, profile_key, config, timeout_ms, download_images, track_stock, version, enabled ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) RETURNING *`, [ profile.dispensaryId, profile.profileName, profile.crawlerType, profile.profileKey ?? null, JSON.stringify(profile.config ?? {}), profile.timeoutMs ?? 30000, profile.downloadImages ?? true, profile.trackStock ?? true, profile.version ?? 1, profile.enabled ?? true, ] ); return mapDbRowToProfile(result.rows[0]); } /** * Update an existing profile */ export async function updateCrawlerProfile( profileId: number, updates: DispensaryCrawlerProfileUpdate ): Promise { // Build dynamic update query const setClauses: string[] = []; const values: any[] = []; let paramIndex = 1; if (updates.profileName !== undefined) { setClauses.push(`profile_name = $${paramIndex++}`); values.push(updates.profileName); } if (updates.crawlerType !== undefined) { setClauses.push(`crawler_type = $${paramIndex++}`); values.push(updates.crawlerType); } if (updates.profileKey !== undefined) { setClauses.push(`profile_key = $${paramIndex++}`); values.push(updates.profileKey); } if (updates.config !== undefined) { setClauses.push(`config = $${paramIndex++}`); values.push(JSON.stringify(updates.config)); } if (updates.timeoutMs !== undefined) { setClauses.push(`timeout_ms = $${paramIndex++}`); values.push(updates.timeoutMs); } if (updates.downloadImages !== undefined) { setClauses.push(`download_images = $${paramIndex++}`); values.push(updates.downloadImages); } if (updates.trackStock !== undefined) { setClauses.push(`track_stock = $${paramIndex++}`); values.push(updates.trackStock); } if (updates.version !== undefined) { setClauses.push(`version = $${paramIndex++}`); values.push(updates.version); } if (updates.enabled !== undefined) { setClauses.push(`enabled = $${paramIndex++}`); values.push(updates.enabled); } if (setClauses.length === 0) { // Nothing to update return getProfileById(profileId); } values.push(profileId); const result = await pool.query( `UPDATE dispensary_crawler_profiles SET ${setClauses.join(', ')} WHERE id = $${paramIndex} RETURNING *`, values ); if (result.rows.length === 0) { return null; } return mapDbRowToProfile(result.rows[0]); } /** * Delete a profile (hard delete - use updateCrawlerProfile with enabled=false for soft delete) */ export async function deleteCrawlerProfile(profileId: number): Promise { // First clear any active_crawler_profile_id references await pool.query( `UPDATE dispensaries SET active_crawler_profile_id = NULL WHERE active_crawler_profile_id = $1`, [profileId] ); const result = await pool.query( `DELETE FROM dispensary_crawler_profiles WHERE id = $1`, [profileId] ); return (result.rowCount ?? 0) > 0; } // ============================================================ // Active Profile Management // ============================================================ /** * Set the active crawler profile for a dispensary */ export async function setActiveCrawlerProfile( dispensaryId: number, profileId: number ): Promise { // Verify the profile belongs to this dispensary and is enabled const profile = await getProfileById(profileId); if (!profile) { throw new Error(`Profile ${profileId} not found`); } if (profile.dispensaryId !== dispensaryId) { throw new Error(`Profile ${profileId} does not belong to dispensary ${dispensaryId}`); } if (!profile.enabled) { throw new Error(`Profile ${profileId} is not enabled`); } await pool.query( `UPDATE dispensaries SET active_crawler_profile_id = $1 WHERE id = $2`, [profileId, dispensaryId] ); } /** * Clear the active crawler profile for a dispensary */ export async function clearActiveCrawlerProfile(dispensaryId: number): Promise { await pool.query( `UPDATE dispensaries SET active_crawler_profile_id = NULL WHERE id = $1`, [dispensaryId] ); } // ============================================================ // Helper Functions // ============================================================ /** * Convert a profile to runtime options for the crawler */ export function profileToOptions(profile: DispensaryCrawlerProfile): CrawlerProfileOptions { return { timeoutMs: profile.timeoutMs ?? 30000, downloadImages: profile.downloadImages, trackStock: profile.trackStock, config: profile.config, }; } /** * Get default options when no profile is configured */ export function getDefaultCrawlerOptions(): CrawlerProfileOptions { return { timeoutMs: 30000, downloadImages: true, trackStock: true, config: {}, }; } /** * Check if a dispensary has any profiles */ export async function dispensaryHasProfiles(dispensaryId: number): Promise { const result = await pool.query( `SELECT EXISTS(SELECT 1 FROM dispensary_crawler_profiles WHERE dispensary_id = $1) as has_profiles`, [dispensaryId] ); return result.rows[0]?.has_profiles ?? false; } /** * Get profile counts by crawler type */ export async function getProfileStats(): Promise<{ crawlerType: string; count: number }[]> { const result = await pool.query( `SELECT crawler_type, COUNT(*) as count FROM dispensary_crawler_profiles WHERE enabled = true GROUP BY crawler_type ORDER BY count DESC` ); return result.rows.map(row => ({ crawlerType: row.crawler_type, count: parseInt(row.count, 10), })); }