chore: Clean up deprecated code and docs
- Move deprecated directories to src/_deprecated/: - hydration/ (old pipeline approach) - scraper-v2/ (old Puppeteer scraper) - canonical-hydration/ (merged into tasks) - Unused services: availability, crawler-logger, geolocation, etc - Unused utils: age-gate-playwright, HomepageValidator, stealthBrowser - Archive outdated docs to docs/_archive/: - ANALYTICS_RUNBOOK.md - ANALYTICS_V2_EXAMPLES.md - BRAND_INTELLIGENCE_API.md - CRAWL_PIPELINE.md - TASK_WORKFLOW_2024-12-10.md - WORKER_TASK_ARCHITECTURE.md - ORGANIC_SCRAPING_GUIDE.md - Add docs/CODEBASE_MAP.md as single source of truth - Add warning files to deprecated/archived directories - Slim down CLAUDE.md to essential rules only 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
363
backend/src/_deprecated/services/crawler-profiles.ts
Normal file
363
backend/src/_deprecated/services/crawler-profiles.ts
Normal file
@@ -0,0 +1,363 @@
|
||||
/**
|
||||
* Crawler Profiles Service
|
||||
*
|
||||
* Manages per-store crawler configuration profiles.
|
||||
* This service handles CRUD operations for dispensary_crawler_profiles
|
||||
* and provides helper functions for loading active profiles.
|
||||
*
|
||||
* Phase 1: Basic profile loading for Dutchie production crawls only.
|
||||
*/
|
||||
|
||||
import { pool } from '../db/pool';
|
||||
import {
|
||||
DispensaryCrawlerProfile,
|
||||
DispensaryCrawlerProfileCreate,
|
||||
DispensaryCrawlerProfileUpdate,
|
||||
CrawlerProfileOptions,
|
||||
} from '../types';
|
||||
|
||||
// ============================================================
|
||||
// Database Row Mapping
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Map database row (snake_case) to TypeScript interface (camelCase)
|
||||
*/
|
||||
function mapDbRowToProfile(row: any): DispensaryCrawlerProfile {
|
||||
return {
|
||||
id: row.id,
|
||||
dispensaryId: row.dispensary_id,
|
||||
profileName: row.profile_name,
|
||||
crawlerType: row.crawler_type,
|
||||
profileKey: row.profile_key,
|
||||
config: row.config || {},
|
||||
timeoutMs: row.timeout_ms,
|
||||
downloadImages: row.download_images,
|
||||
trackStock: row.track_stock,
|
||||
version: row.version,
|
||||
enabled: row.enabled,
|
||||
createdAt: row.created_at,
|
||||
updatedAt: row.updated_at,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Profile Retrieval
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Get the active crawler profile for a dispensary.
|
||||
*
|
||||
* Resolution order:
|
||||
* 1. If dispensaries.active_crawler_profile_id is set, load that profile (if enabled)
|
||||
* 2. Otherwise, find the most recently created enabled profile matching the dispensary's
|
||||
* menu_type (for Dutchie, crawler_type = 'dutchie')
|
||||
* 3. Returns null if no matching profile exists
|
||||
*
|
||||
* @param dispensaryId - The dispensary ID to look up
|
||||
* @param crawlerType - Optional: filter by crawler type (defaults to checking menu_type)
|
||||
*/
|
||||
export async function getActiveCrawlerProfileForDispensary(
|
||||
dispensaryId: number,
|
||||
crawlerType?: string
|
||||
): Promise<DispensaryCrawlerProfile | null> {
|
||||
// First, check if there's an explicit active_crawler_profile_id set
|
||||
const activeProfileResult = await pool.query(
|
||||
`SELECT dcp.*
|
||||
FROM dispensary_crawler_profiles dcp
|
||||
INNER JOIN dispensaries d ON d.active_crawler_profile_id = dcp.id
|
||||
WHERE d.id = $1 AND dcp.enabled = true`,
|
||||
[dispensaryId]
|
||||
);
|
||||
|
||||
if (activeProfileResult.rows.length > 0) {
|
||||
return mapDbRowToProfile(activeProfileResult.rows[0]);
|
||||
}
|
||||
|
||||
// No explicit active profile - fall back to most recent enabled profile
|
||||
// If crawlerType not specified, try to match dispensary's menu_type
|
||||
let effectiveCrawlerType = crawlerType;
|
||||
if (!effectiveCrawlerType) {
|
||||
const dispensaryResult = await pool.query(
|
||||
`SELECT menu_type FROM dispensaries WHERE id = $1`,
|
||||
[dispensaryId]
|
||||
);
|
||||
if (dispensaryResult.rows.length > 0 && dispensaryResult.rows[0].menu_type) {
|
||||
effectiveCrawlerType = dispensaryResult.rows[0].menu_type;
|
||||
}
|
||||
}
|
||||
|
||||
// If we still don't have a crawler type, default to 'dutchie' for Phase 1
|
||||
if (!effectiveCrawlerType) {
|
||||
effectiveCrawlerType = 'dutchie';
|
||||
}
|
||||
|
||||
const fallbackResult = await pool.query(
|
||||
`SELECT * FROM dispensary_crawler_profiles
|
||||
WHERE dispensary_id = $1
|
||||
AND crawler_type = $2
|
||||
AND enabled = true
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1`,
|
||||
[dispensaryId, effectiveCrawlerType]
|
||||
);
|
||||
|
||||
if (fallbackResult.rows.length > 0) {
|
||||
return mapDbRowToProfile(fallbackResult.rows[0]);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all profiles for a dispensary
|
||||
*/
|
||||
export async function getProfilesForDispensary(
|
||||
dispensaryId: number
|
||||
): Promise<DispensaryCrawlerProfile[]> {
|
||||
const result = await pool.query(
|
||||
`SELECT * FROM dispensary_crawler_profiles
|
||||
WHERE dispensary_id = $1
|
||||
ORDER BY created_at DESC`,
|
||||
[dispensaryId]
|
||||
);
|
||||
|
||||
return result.rows.map(mapDbRowToProfile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a profile by ID
|
||||
*/
|
||||
export async function getProfileById(
|
||||
profileId: number
|
||||
): Promise<DispensaryCrawlerProfile | null> {
|
||||
const result = await pool.query(
|
||||
`SELECT * FROM dispensary_crawler_profiles WHERE id = $1`,
|
||||
[profileId]
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return mapDbRowToProfile(result.rows[0]);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Profile Creation & Update
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Create a new crawler profile
|
||||
*/
|
||||
export async function createCrawlerProfile(
|
||||
profile: DispensaryCrawlerProfileCreate
|
||||
): Promise<DispensaryCrawlerProfile> {
|
||||
const result = await pool.query(
|
||||
`INSERT INTO dispensary_crawler_profiles (
|
||||
dispensary_id, profile_name, crawler_type, profile_key,
|
||||
config, timeout_ms, download_images, track_stock, version, enabled
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
|
||||
RETURNING *`,
|
||||
[
|
||||
profile.dispensaryId,
|
||||
profile.profileName,
|
||||
profile.crawlerType,
|
||||
profile.profileKey ?? null,
|
||||
JSON.stringify(profile.config ?? {}),
|
||||
profile.timeoutMs ?? 30000,
|
||||
profile.downloadImages ?? true,
|
||||
profile.trackStock ?? true,
|
||||
profile.version ?? 1,
|
||||
profile.enabled ?? true,
|
||||
]
|
||||
);
|
||||
|
||||
return mapDbRowToProfile(result.rows[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update an existing profile
|
||||
*/
|
||||
export async function updateCrawlerProfile(
|
||||
profileId: number,
|
||||
updates: DispensaryCrawlerProfileUpdate
|
||||
): Promise<DispensaryCrawlerProfile | null> {
|
||||
// Build dynamic update query
|
||||
const setClauses: string[] = [];
|
||||
const values: any[] = [];
|
||||
let paramIndex = 1;
|
||||
|
||||
if (updates.profileName !== undefined) {
|
||||
setClauses.push(`profile_name = $${paramIndex++}`);
|
||||
values.push(updates.profileName);
|
||||
}
|
||||
if (updates.crawlerType !== undefined) {
|
||||
setClauses.push(`crawler_type = $${paramIndex++}`);
|
||||
values.push(updates.crawlerType);
|
||||
}
|
||||
if (updates.profileKey !== undefined) {
|
||||
setClauses.push(`profile_key = $${paramIndex++}`);
|
||||
values.push(updates.profileKey);
|
||||
}
|
||||
if (updates.config !== undefined) {
|
||||
setClauses.push(`config = $${paramIndex++}`);
|
||||
values.push(JSON.stringify(updates.config));
|
||||
}
|
||||
if (updates.timeoutMs !== undefined) {
|
||||
setClauses.push(`timeout_ms = $${paramIndex++}`);
|
||||
values.push(updates.timeoutMs);
|
||||
}
|
||||
if (updates.downloadImages !== undefined) {
|
||||
setClauses.push(`download_images = $${paramIndex++}`);
|
||||
values.push(updates.downloadImages);
|
||||
}
|
||||
if (updates.trackStock !== undefined) {
|
||||
setClauses.push(`track_stock = $${paramIndex++}`);
|
||||
values.push(updates.trackStock);
|
||||
}
|
||||
if (updates.version !== undefined) {
|
||||
setClauses.push(`version = $${paramIndex++}`);
|
||||
values.push(updates.version);
|
||||
}
|
||||
if (updates.enabled !== undefined) {
|
||||
setClauses.push(`enabled = $${paramIndex++}`);
|
||||
values.push(updates.enabled);
|
||||
}
|
||||
|
||||
if (setClauses.length === 0) {
|
||||
// Nothing to update
|
||||
return getProfileById(profileId);
|
||||
}
|
||||
|
||||
values.push(profileId);
|
||||
|
||||
const result = await pool.query(
|
||||
`UPDATE dispensary_crawler_profiles
|
||||
SET ${setClauses.join(', ')}
|
||||
WHERE id = $${paramIndex}
|
||||
RETURNING *`,
|
||||
values
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return mapDbRowToProfile(result.rows[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a profile (hard delete - use updateCrawlerProfile with enabled=false for soft delete)
|
||||
*/
|
||||
export async function deleteCrawlerProfile(profileId: number): Promise<boolean> {
|
||||
// First clear any active_crawler_profile_id references
|
||||
await pool.query(
|
||||
`UPDATE dispensaries SET active_crawler_profile_id = NULL
|
||||
WHERE active_crawler_profile_id = $1`,
|
||||
[profileId]
|
||||
);
|
||||
|
||||
const result = await pool.query(
|
||||
`DELETE FROM dispensary_crawler_profiles WHERE id = $1`,
|
||||
[profileId]
|
||||
);
|
||||
|
||||
return (result.rowCount ?? 0) > 0;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Active Profile Management
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Set the active crawler profile for a dispensary
|
||||
*/
|
||||
export async function setActiveCrawlerProfile(
|
||||
dispensaryId: number,
|
||||
profileId: number
|
||||
): Promise<void> {
|
||||
// Verify the profile belongs to this dispensary and is enabled
|
||||
const profile = await getProfileById(profileId);
|
||||
if (!profile) {
|
||||
throw new Error(`Profile ${profileId} not found`);
|
||||
}
|
||||
if (profile.dispensaryId !== dispensaryId) {
|
||||
throw new Error(`Profile ${profileId} does not belong to dispensary ${dispensaryId}`);
|
||||
}
|
||||
if (!profile.enabled) {
|
||||
throw new Error(`Profile ${profileId} is not enabled`);
|
||||
}
|
||||
|
||||
await pool.query(
|
||||
`UPDATE dispensaries SET active_crawler_profile_id = $1 WHERE id = $2`,
|
||||
[profileId, dispensaryId]
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the active crawler profile for a dispensary
|
||||
*/
|
||||
export async function clearActiveCrawlerProfile(dispensaryId: number): Promise<void> {
|
||||
await pool.query(
|
||||
`UPDATE dispensaries SET active_crawler_profile_id = NULL WHERE id = $1`,
|
||||
[dispensaryId]
|
||||
);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Helper Functions
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Convert a profile to runtime options for the crawler
|
||||
*/
|
||||
export function profileToOptions(profile: DispensaryCrawlerProfile): CrawlerProfileOptions {
|
||||
return {
|
||||
timeoutMs: profile.timeoutMs ?? 30000,
|
||||
downloadImages: profile.downloadImages,
|
||||
trackStock: profile.trackStock,
|
||||
config: profile.config,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get default options when no profile is configured
|
||||
*/
|
||||
export function getDefaultCrawlerOptions(): CrawlerProfileOptions {
|
||||
return {
|
||||
timeoutMs: 30000,
|
||||
downloadImages: true,
|
||||
trackStock: true,
|
||||
config: {},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a dispensary has any profiles
|
||||
*/
|
||||
export async function dispensaryHasProfiles(dispensaryId: number): Promise<boolean> {
|
||||
const result = await pool.query(
|
||||
`SELECT EXISTS(SELECT 1 FROM dispensary_crawler_profiles WHERE dispensary_id = $1) as has_profiles`,
|
||||
[dispensaryId]
|
||||
);
|
||||
return result.rows[0]?.has_profiles ?? false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get profile counts by crawler type
|
||||
*/
|
||||
export async function getProfileStats(): Promise<{ crawlerType: string; count: number }[]> {
|
||||
const result = await pool.query(
|
||||
`SELECT crawler_type, COUNT(*) as count
|
||||
FROM dispensary_crawler_profiles
|
||||
WHERE enabled = true
|
||||
GROUP BY crawler_type
|
||||
ORDER BY count DESC`
|
||||
);
|
||||
|
||||
return result.rows.map(row => ({
|
||||
crawlerType: row.crawler_type,
|
||||
count: parseInt(row.count, 10),
|
||||
}));
|
||||
}
|
||||
Reference in New Issue
Block a user