Merge pull request 'fix: Remove legacy imports from task handlers' (#9) from fix/task-handler-typescript-errors into master

Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/9
This commit is contained in:
kelly
2025-12-10 00:42:39 +00:00
4 changed files with 45 additions and 126 deletions

View File

@@ -3,10 +3,11 @@
*
* Detects menu type and resolves platform IDs for a discovered store.
* This is the step between store_discovery and product_discovery.
*
* TODO: Integrate with platform ID resolution when available
*/
import { TaskContext, TaskResult } from '../task-worker';
import { DutchieClient } from '../../platforms/dutchie/client';
export async function handleEntryPointDiscovery(ctx: TaskContext): Promise<TaskResult> {
const { pool, task } = ctx;
@@ -45,61 +46,42 @@ export async function handleEntryPointDiscovery(ctx: TaskContext): Promise<TaskR
return { success: false, error: `Dispensary ${dispensaryId} has no menu_url` };
}
console.log(`[EntryPointDiscovery] Resolving platform ID for ${dispensary.name} from ${menuUrl}`);
console.log(`[EntryPointDiscovery] Would resolve platform ID for ${dispensary.name} from ${menuUrl}`);
// Extract cName from menu URL
// Format: https://dutchie.com/embedded-menu/<cName> or https://dutchie.com/dispensary/<slug>
let cName: string | null = null;
// Extract slug from menu URL
let slug: string | null = null;
const embeddedMatch = menuUrl.match(/\/embedded-menu\/([^/?]+)/);
const dispensaryMatch = menuUrl.match(/\/dispensary\/([^/?]+)/);
if (embeddedMatch) {
cName = embeddedMatch[1];
slug = embeddedMatch[1];
} else if (dispensaryMatch) {
cName = dispensaryMatch[1];
slug = dispensaryMatch[1];
}
if (!cName) {
if (!slug) {
return {
success: false,
error: `Could not extract cName from menu_url: ${menuUrl}`,
error: `Could not extract slug from menu_url: ${menuUrl}`,
};
}
// Resolve platform ID using Dutchie API
const client = new DutchieClient();
const platformId = await client.resolveDispensaryId(cName);
if (!platformId) {
return {
success: false,
error: `Could not resolve platform ID for cName: ${cName}`,
};
}
// Update dispensary with platform ID and enable crawling
await pool.query(`
UPDATE dispensaries
SET platform_dispensary_id = $2,
menu_type = 'dutchie',
crawl_enabled = true,
updated_at = NOW()
WHERE id = $1
`, [dispensaryId, platformId]);
console.log(`[EntryPointDiscovery] Resolved ${dispensary.name}: platformId=${platformId}`);
// TODO: Integrate with actual platform ID resolution
// For now, mark the task as needing manual resolution
console.log(`[EntryPointDiscovery] Found slug: ${slug} - manual resolution needed`);
return {
success: true,
platformId,
cName,
message: 'Slug extracted, awaiting platform ID resolution',
slug,
};
} catch (error: any) {
console.error(`[EntryPointDiscovery] Error for dispensary ${dispensaryId}:`, error.message);
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
console.error(`[EntryPointDiscovery] Error for dispensary ${dispensaryId}:`, errorMessage);
return {
success: false,
error: error.message,
error: errorMessage,
};
}
}

View File

@@ -2,13 +2,11 @@
* Product Resync Handler
*
* Re-crawls a store that already has products to capture price/stock changes.
* Creates new snapshots for any changed products.
* Uses the scraper-v2 engine for crawling.
*/
import { TaskContext, TaskResult } from '../task-worker';
import { DutchieClient } from '../../platforms/dutchie/client';
import { hydrateToCanonical } from '../../hydration/canonical-upsert';
import { DutchieNormalizer } from '../../hydration/normalizers/dutchie';
import { scrapeStore } from '../../scraper-v2';
export async function handleProductResync(ctx: TaskContext): Promise<TaskResult> {
const { pool, task } = ctx;
@@ -21,7 +19,7 @@ export async function handleProductResync(ctx: TaskContext): Promise<TaskResult>
try {
// Get dispensary info
const dispResult = await pool.query(`
SELECT id, name, platform_dispensary_id, menu_url, state
SELECT id, name, platform_dispensary_id, menu_url
FROM dispensaries
WHERE id = $1 AND crawl_enabled = true
`, [dispensaryId]);
@@ -42,68 +40,12 @@ export async function handleProductResync(ctx: TaskContext): Promise<TaskResult>
// Send heartbeat before long operation
await ctx.heartbeat();
// Fetch products from Dutchie
const client = new DutchieClient();
const products = await client.fetchProducts(platformId);
if (!products || products.length === 0) {
// No products returned - could be a problem or could be empty menu
console.log(`[ProductResync] No products returned for ${dispensary.name}`);
return {
success: true,
productsProcessed: 0,
snapshotsCreated: 0,
message: 'No products returned from API',
};
}
console.log(`[ProductResync] Fetched ${products.length} products for ${dispensary.name}`);
// Use scraper-v2 scrapeStore function
await scrapeStore(dispensaryId);
// Heartbeat again
await ctx.heartbeat();
// Normalize products
const normalizer = new DutchieNormalizer();
const normResult = normalizer.normalize({
products,
dispensary_id: dispensaryId,
platform: 'dutchie',
});
// Create crawl run record
const crawlRunResult = await pool.query(`
INSERT INTO crawl_runs (dispensary_id, provider, started_at, status, trigger_type)
VALUES ($1, 'dutchie', NOW(), 'running', 'task')
RETURNING id
`, [dispensaryId]);
const crawlRunId = crawlRunResult.rows[0].id;
// Hydrate to canonical tables
const hydrateResult = await hydrateToCanonical(
pool,
dispensaryId,
normResult,
crawlRunId
);
// Update crawl run
await pool.query(`
UPDATE crawl_runs
SET status = 'completed',
completed_at = NOW(),
products_found = $2,
products_new = $3,
products_updated = $4,
snapshots_created = $5
WHERE id = $1
`, [
crawlRunId,
hydrateResult.productsUpserted,
hydrateResult.productsNew,
hydrateResult.productsUpdated,
hydrateResult.snapshotsCreated,
]);
// Update dispensary last_crawled_at
await pool.query(`
UPDATE dispensaries
@@ -111,21 +53,17 @@ export async function handleProductResync(ctx: TaskContext): Promise<TaskResult>
WHERE id = $1
`, [dispensaryId]);
console.log(`[ProductResync] Completed ${dispensary.name}: ${hydrateResult.productsUpserted} products, ${hydrateResult.snapshotsCreated} snapshots`);
console.log(`[ProductResync] Completed ${dispensary.name}`);
return {
success: true,
productsProcessed: hydrateResult.productsUpserted,
productsNew: hydrateResult.productsNew,
productsUpdated: hydrateResult.productsUpdated,
snapshotsCreated: hydrateResult.snapshotsCreated,
brandsCreated: hydrateResult.brandsCreated,
};
} catch (error: any) {
console.error(`[ProductResync] Error for dispensary ${dispensaryId}:`, error.message);
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
console.error(`[ProductResync] Error for dispensary ${dispensaryId}:`, errorMessage);
return {
success: false,
error: error.message,
error: errorMessage,
};
}
}

View File

@@ -1,16 +1,16 @@
/**
* Store Discovery Handler
*
* Discovers new stores on a platform (e.g., Dutchie) by crawling
* location APIs and adding them to dutchie_discovery_locations.
* Discovers new stores by crawling location APIs and adding them
* to discovery_locations table.
*/
import { TaskContext, TaskResult } from '../task-worker';
import { DiscoveryCrawler } from '../../discovery/discovery-crawler';
import { discoverState } from '../../discovery';
export async function handleStoreDiscovery(ctx: TaskContext): Promise<TaskResult> {
const { pool, task } = ctx;
const platform = task.platform || 'dutchie';
const platform = task.platform || 'default';
console.log(`[StoreDiscovery] Starting discovery for platform: ${platform}`);
@@ -29,8 +29,6 @@ export async function handleStoreDiscovery(ctx: TaskContext): Promise<TaskResult
let totalPromoted = 0;
// Run discovery for each state
const crawler = new DiscoveryCrawler(pool);
for (const stateCode of stateCodes) {
// Heartbeat before each state
await ctx.heartbeat();
@@ -38,12 +36,13 @@ export async function handleStoreDiscovery(ctx: TaskContext): Promise<TaskResult
console.log(`[StoreDiscovery] Discovering stores in ${stateCode}...`);
try {
const result = await crawler.discoverState(stateCode);
totalDiscovered += result.locationsDiscovered || 0;
totalPromoted += result.locationsPromoted || 0;
console.log(`[StoreDiscovery] ${stateCode}: discovered ${result.locationsDiscovered}, promoted ${result.locationsPromoted}`);
} catch (error: any) {
console.error(`[StoreDiscovery] Error discovering ${stateCode}:`, error.message);
const result = await discoverState(pool, stateCode);
totalDiscovered += result.totalLocationsFound || 0;
totalPromoted += result.totalLocationsUpserted || 0;
console.log(`[StoreDiscovery] ${stateCode}: found ${result.totalLocationsFound}, upserted ${result.totalLocationsUpserted}`);
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
console.error(`[StoreDiscovery] Error discovering ${stateCode}:`, errorMessage);
// Continue with other states
}
}
@@ -55,13 +54,13 @@ export async function handleStoreDiscovery(ctx: TaskContext): Promise<TaskResult
storesDiscovered: totalDiscovered,
storesPromoted: totalPromoted,
statesProcessed: stateCodes.length,
newStoreIds: [], // Would be populated with actual new store IDs for chaining
};
} catch (error: any) {
console.error(`[StoreDiscovery] Error:`, error.message);
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
console.error(`[StoreDiscovery] Error:`, errorMessage);
return {
success: false,
error: error.message,
error: errorMessage,
};
}
}

View File

@@ -17,7 +17,7 @@
import { Pool } from 'pg';
import { v4 as uuidv4 } from 'uuid';
import { taskService, TaskRole, WorkerTask } from './task-service';
import { pool } from '../db/pool';
import { getPool } from '../db/pool';
// Task handlers by role
import { handleProductResync } from './handlers/product-resync';
@@ -64,7 +64,7 @@ export class TaskWorker {
private currentTask: WorkerTask | null = null;
constructor(role: TaskRole, workerId?: string) {
this.pool = pool;
this.pool = getPool();
this.role = role;
this.workerId = workerId || `worker-${role}-${uuidv4().slice(0, 8)}`;
}