Merge pull request 'fix: Remove legacy imports from task handlers' (#9) from fix/task-handler-typescript-errors into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/9
This commit is contained in:
@@ -3,10 +3,11 @@
|
|||||||
*
|
*
|
||||||
* Detects menu type and resolves platform IDs for a discovered store.
|
* Detects menu type and resolves platform IDs for a discovered store.
|
||||||
* This is the step between store_discovery and product_discovery.
|
* This is the step between store_discovery and product_discovery.
|
||||||
|
*
|
||||||
|
* TODO: Integrate with platform ID resolution when available
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { TaskContext, TaskResult } from '../task-worker';
|
import { TaskContext, TaskResult } from '../task-worker';
|
||||||
import { DutchieClient } from '../../platforms/dutchie/client';
|
|
||||||
|
|
||||||
export async function handleEntryPointDiscovery(ctx: TaskContext): Promise<TaskResult> {
|
export async function handleEntryPointDiscovery(ctx: TaskContext): Promise<TaskResult> {
|
||||||
const { pool, task } = ctx;
|
const { pool, task } = ctx;
|
||||||
@@ -45,61 +46,42 @@ export async function handleEntryPointDiscovery(ctx: TaskContext): Promise<TaskR
|
|||||||
return { success: false, error: `Dispensary ${dispensaryId} has no menu_url` };
|
return { success: false, error: `Dispensary ${dispensaryId} has no menu_url` };
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`[EntryPointDiscovery] Resolving platform ID for ${dispensary.name} from ${menuUrl}`);
|
console.log(`[EntryPointDiscovery] Would resolve platform ID for ${dispensary.name} from ${menuUrl}`);
|
||||||
|
|
||||||
// Extract cName from menu URL
|
// Extract slug from menu URL
|
||||||
// Format: https://dutchie.com/embedded-menu/<cName> or https://dutchie.com/dispensary/<slug>
|
let slug: string | null = null;
|
||||||
let cName: string | null = null;
|
|
||||||
|
|
||||||
const embeddedMatch = menuUrl.match(/\/embedded-menu\/([^/?]+)/);
|
const embeddedMatch = menuUrl.match(/\/embedded-menu\/([^/?]+)/);
|
||||||
const dispensaryMatch = menuUrl.match(/\/dispensary\/([^/?]+)/);
|
const dispensaryMatch = menuUrl.match(/\/dispensary\/([^/?]+)/);
|
||||||
|
|
||||||
if (embeddedMatch) {
|
if (embeddedMatch) {
|
||||||
cName = embeddedMatch[1];
|
slug = embeddedMatch[1];
|
||||||
} else if (dispensaryMatch) {
|
} else if (dispensaryMatch) {
|
||||||
cName = dispensaryMatch[1];
|
slug = dispensaryMatch[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!cName) {
|
if (!slug) {
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
error: `Could not extract cName from menu_url: ${menuUrl}`,
|
error: `Could not extract slug from menu_url: ${menuUrl}`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Resolve platform ID using Dutchie API
|
// TODO: Integrate with actual platform ID resolution
|
||||||
const client = new DutchieClient();
|
// For now, mark the task as needing manual resolution
|
||||||
const platformId = await client.resolveDispensaryId(cName);
|
console.log(`[EntryPointDiscovery] Found slug: ${slug} - manual resolution needed`);
|
||||||
|
|
||||||
if (!platformId) {
|
|
||||||
return {
|
|
||||||
success: false,
|
|
||||||
error: `Could not resolve platform ID for cName: ${cName}`,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update dispensary with platform ID and enable crawling
|
|
||||||
await pool.query(`
|
|
||||||
UPDATE dispensaries
|
|
||||||
SET platform_dispensary_id = $2,
|
|
||||||
menu_type = 'dutchie',
|
|
||||||
crawl_enabled = true,
|
|
||||||
updated_at = NOW()
|
|
||||||
WHERE id = $1
|
|
||||||
`, [dispensaryId, platformId]);
|
|
||||||
|
|
||||||
console.log(`[EntryPointDiscovery] Resolved ${dispensary.name}: platformId=${platformId}`);
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
platformId,
|
message: 'Slug extracted, awaiting platform ID resolution',
|
||||||
cName,
|
slug,
|
||||||
};
|
};
|
||||||
} catch (error: any) {
|
} catch (error: unknown) {
|
||||||
console.error(`[EntryPointDiscovery] Error for dispensary ${dispensaryId}:`, error.message);
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
console.error(`[EntryPointDiscovery] Error for dispensary ${dispensaryId}:`, errorMessage);
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
error: error.message,
|
error: errorMessage,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,13 +2,11 @@
|
|||||||
* Product Resync Handler
|
* Product Resync Handler
|
||||||
*
|
*
|
||||||
* Re-crawls a store that already has products to capture price/stock changes.
|
* Re-crawls a store that already has products to capture price/stock changes.
|
||||||
* Creates new snapshots for any changed products.
|
* Uses the scraper-v2 engine for crawling.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { TaskContext, TaskResult } from '../task-worker';
|
import { TaskContext, TaskResult } from '../task-worker';
|
||||||
import { DutchieClient } from '../../platforms/dutchie/client';
|
import { scrapeStore } from '../../scraper-v2';
|
||||||
import { hydrateToCanonical } from '../../hydration/canonical-upsert';
|
|
||||||
import { DutchieNormalizer } from '../../hydration/normalizers/dutchie';
|
|
||||||
|
|
||||||
export async function handleProductResync(ctx: TaskContext): Promise<TaskResult> {
|
export async function handleProductResync(ctx: TaskContext): Promise<TaskResult> {
|
||||||
const { pool, task } = ctx;
|
const { pool, task } = ctx;
|
||||||
@@ -21,7 +19,7 @@ export async function handleProductResync(ctx: TaskContext): Promise<TaskResult>
|
|||||||
try {
|
try {
|
||||||
// Get dispensary info
|
// Get dispensary info
|
||||||
const dispResult = await pool.query(`
|
const dispResult = await pool.query(`
|
||||||
SELECT id, name, platform_dispensary_id, menu_url, state
|
SELECT id, name, platform_dispensary_id, menu_url
|
||||||
FROM dispensaries
|
FROM dispensaries
|
||||||
WHERE id = $1 AND crawl_enabled = true
|
WHERE id = $1 AND crawl_enabled = true
|
||||||
`, [dispensaryId]);
|
`, [dispensaryId]);
|
||||||
@@ -42,68 +40,12 @@ export async function handleProductResync(ctx: TaskContext): Promise<TaskResult>
|
|||||||
// Send heartbeat before long operation
|
// Send heartbeat before long operation
|
||||||
await ctx.heartbeat();
|
await ctx.heartbeat();
|
||||||
|
|
||||||
// Fetch products from Dutchie
|
// Use scraper-v2 scrapeStore function
|
||||||
const client = new DutchieClient();
|
await scrapeStore(dispensaryId);
|
||||||
const products = await client.fetchProducts(platformId);
|
|
||||||
|
|
||||||
if (!products || products.length === 0) {
|
|
||||||
// No products returned - could be a problem or could be empty menu
|
|
||||||
console.log(`[ProductResync] No products returned for ${dispensary.name}`);
|
|
||||||
return {
|
|
||||||
success: true,
|
|
||||||
productsProcessed: 0,
|
|
||||||
snapshotsCreated: 0,
|
|
||||||
message: 'No products returned from API',
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`[ProductResync] Fetched ${products.length} products for ${dispensary.name}`);
|
|
||||||
|
|
||||||
// Heartbeat again
|
// Heartbeat again
|
||||||
await ctx.heartbeat();
|
await ctx.heartbeat();
|
||||||
|
|
||||||
// Normalize products
|
|
||||||
const normalizer = new DutchieNormalizer();
|
|
||||||
const normResult = normalizer.normalize({
|
|
||||||
products,
|
|
||||||
dispensary_id: dispensaryId,
|
|
||||||
platform: 'dutchie',
|
|
||||||
});
|
|
||||||
|
|
||||||
// Create crawl run record
|
|
||||||
const crawlRunResult = await pool.query(`
|
|
||||||
INSERT INTO crawl_runs (dispensary_id, provider, started_at, status, trigger_type)
|
|
||||||
VALUES ($1, 'dutchie', NOW(), 'running', 'task')
|
|
||||||
RETURNING id
|
|
||||||
`, [dispensaryId]);
|
|
||||||
const crawlRunId = crawlRunResult.rows[0].id;
|
|
||||||
|
|
||||||
// Hydrate to canonical tables
|
|
||||||
const hydrateResult = await hydrateToCanonical(
|
|
||||||
pool,
|
|
||||||
dispensaryId,
|
|
||||||
normResult,
|
|
||||||
crawlRunId
|
|
||||||
);
|
|
||||||
|
|
||||||
// Update crawl run
|
|
||||||
await pool.query(`
|
|
||||||
UPDATE crawl_runs
|
|
||||||
SET status = 'completed',
|
|
||||||
completed_at = NOW(),
|
|
||||||
products_found = $2,
|
|
||||||
products_new = $3,
|
|
||||||
products_updated = $4,
|
|
||||||
snapshots_created = $5
|
|
||||||
WHERE id = $1
|
|
||||||
`, [
|
|
||||||
crawlRunId,
|
|
||||||
hydrateResult.productsUpserted,
|
|
||||||
hydrateResult.productsNew,
|
|
||||||
hydrateResult.productsUpdated,
|
|
||||||
hydrateResult.snapshotsCreated,
|
|
||||||
]);
|
|
||||||
|
|
||||||
// Update dispensary last_crawled_at
|
// Update dispensary last_crawled_at
|
||||||
await pool.query(`
|
await pool.query(`
|
||||||
UPDATE dispensaries
|
UPDATE dispensaries
|
||||||
@@ -111,21 +53,17 @@ export async function handleProductResync(ctx: TaskContext): Promise<TaskResult>
|
|||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
`, [dispensaryId]);
|
`, [dispensaryId]);
|
||||||
|
|
||||||
console.log(`[ProductResync] Completed ${dispensary.name}: ${hydrateResult.productsUpserted} products, ${hydrateResult.snapshotsCreated} snapshots`);
|
console.log(`[ProductResync] Completed ${dispensary.name}`);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
productsProcessed: hydrateResult.productsUpserted,
|
|
||||||
productsNew: hydrateResult.productsNew,
|
|
||||||
productsUpdated: hydrateResult.productsUpdated,
|
|
||||||
snapshotsCreated: hydrateResult.snapshotsCreated,
|
|
||||||
brandsCreated: hydrateResult.brandsCreated,
|
|
||||||
};
|
};
|
||||||
} catch (error: any) {
|
} catch (error: unknown) {
|
||||||
console.error(`[ProductResync] Error for dispensary ${dispensaryId}:`, error.message);
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
console.error(`[ProductResync] Error for dispensary ${dispensaryId}:`, errorMessage);
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
error: error.message,
|
error: errorMessage,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,16 +1,16 @@
|
|||||||
/**
|
/**
|
||||||
* Store Discovery Handler
|
* Store Discovery Handler
|
||||||
*
|
*
|
||||||
* Discovers new stores on a platform (e.g., Dutchie) by crawling
|
* Discovers new stores by crawling location APIs and adding them
|
||||||
* location APIs and adding them to dutchie_discovery_locations.
|
* to discovery_locations table.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { TaskContext, TaskResult } from '../task-worker';
|
import { TaskContext, TaskResult } from '../task-worker';
|
||||||
import { DiscoveryCrawler } from '../../discovery/discovery-crawler';
|
import { discoverState } from '../../discovery';
|
||||||
|
|
||||||
export async function handleStoreDiscovery(ctx: TaskContext): Promise<TaskResult> {
|
export async function handleStoreDiscovery(ctx: TaskContext): Promise<TaskResult> {
|
||||||
const { pool, task } = ctx;
|
const { pool, task } = ctx;
|
||||||
const platform = task.platform || 'dutchie';
|
const platform = task.platform || 'default';
|
||||||
|
|
||||||
console.log(`[StoreDiscovery] Starting discovery for platform: ${platform}`);
|
console.log(`[StoreDiscovery] Starting discovery for platform: ${platform}`);
|
||||||
|
|
||||||
@@ -29,8 +29,6 @@ export async function handleStoreDiscovery(ctx: TaskContext): Promise<TaskResult
|
|||||||
let totalPromoted = 0;
|
let totalPromoted = 0;
|
||||||
|
|
||||||
// Run discovery for each state
|
// Run discovery for each state
|
||||||
const crawler = new DiscoveryCrawler(pool);
|
|
||||||
|
|
||||||
for (const stateCode of stateCodes) {
|
for (const stateCode of stateCodes) {
|
||||||
// Heartbeat before each state
|
// Heartbeat before each state
|
||||||
await ctx.heartbeat();
|
await ctx.heartbeat();
|
||||||
@@ -38,12 +36,13 @@ export async function handleStoreDiscovery(ctx: TaskContext): Promise<TaskResult
|
|||||||
console.log(`[StoreDiscovery] Discovering stores in ${stateCode}...`);
|
console.log(`[StoreDiscovery] Discovering stores in ${stateCode}...`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = await crawler.discoverState(stateCode);
|
const result = await discoverState(pool, stateCode);
|
||||||
totalDiscovered += result.locationsDiscovered || 0;
|
totalDiscovered += result.totalLocationsFound || 0;
|
||||||
totalPromoted += result.locationsPromoted || 0;
|
totalPromoted += result.totalLocationsUpserted || 0;
|
||||||
console.log(`[StoreDiscovery] ${stateCode}: discovered ${result.locationsDiscovered}, promoted ${result.locationsPromoted}`);
|
console.log(`[StoreDiscovery] ${stateCode}: found ${result.totalLocationsFound}, upserted ${result.totalLocationsUpserted}`);
|
||||||
} catch (error: any) {
|
} catch (error: unknown) {
|
||||||
console.error(`[StoreDiscovery] Error discovering ${stateCode}:`, error.message);
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
console.error(`[StoreDiscovery] Error discovering ${stateCode}:`, errorMessage);
|
||||||
// Continue with other states
|
// Continue with other states
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -55,13 +54,13 @@ export async function handleStoreDiscovery(ctx: TaskContext): Promise<TaskResult
|
|||||||
storesDiscovered: totalDiscovered,
|
storesDiscovered: totalDiscovered,
|
||||||
storesPromoted: totalPromoted,
|
storesPromoted: totalPromoted,
|
||||||
statesProcessed: stateCodes.length,
|
statesProcessed: stateCodes.length,
|
||||||
newStoreIds: [], // Would be populated with actual new store IDs for chaining
|
|
||||||
};
|
};
|
||||||
} catch (error: any) {
|
} catch (error: unknown) {
|
||||||
console.error(`[StoreDiscovery] Error:`, error.message);
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
console.error(`[StoreDiscovery] Error:`, errorMessage);
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
error: error.message,
|
error: errorMessage,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
import { Pool } from 'pg';
|
import { Pool } from 'pg';
|
||||||
import { v4 as uuidv4 } from 'uuid';
|
import { v4 as uuidv4 } from 'uuid';
|
||||||
import { taskService, TaskRole, WorkerTask } from './task-service';
|
import { taskService, TaskRole, WorkerTask } from './task-service';
|
||||||
import { pool } from '../db/pool';
|
import { getPool } from '../db/pool';
|
||||||
|
|
||||||
// Task handlers by role
|
// Task handlers by role
|
||||||
import { handleProductResync } from './handlers/product-resync';
|
import { handleProductResync } from './handlers/product-resync';
|
||||||
@@ -64,7 +64,7 @@ export class TaskWorker {
|
|||||||
private currentTask: WorkerTask | null = null;
|
private currentTask: WorkerTask | null = null;
|
||||||
|
|
||||||
constructor(role: TaskRole, workerId?: string) {
|
constructor(role: TaskRole, workerId?: string) {
|
||||||
this.pool = pool;
|
this.pool = getPool();
|
||||||
this.role = role;
|
this.role = role;
|
||||||
this.workerId = workerId || `worker-${role}-${uuidv4().slice(0, 8)}`;
|
this.workerId = workerId || `worker-${role}-${uuidv4().slice(0, 8)}`;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user