feat: Add v2 architecture with multi-state support and orchestrator services
Major additions: - Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare - Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator - Discovery system: dutchie discovery service, geo validation, city seeding scripts - Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages - Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram) - Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata Frontend pages added: - Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores - StateHeatmap, CrossStateCompare, SyncInfoPanel Components added: - StateSelector, OrchestratorTraceModal, WorkflowStepper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
435
backend/src/hydration/canonical-upsert.ts
Normal file
435
backend/src/hydration/canonical-upsert.ts
Normal file
@@ -0,0 +1,435 @@
|
||||
/**
|
||||
* Canonical Upsert Functions
|
||||
*
|
||||
* Upserts normalized data into canonical tables:
|
||||
* - store_products
|
||||
* - store_product_snapshots
|
||||
* - brands
|
||||
* - categories (future)
|
||||
*/
|
||||
|
||||
import { Pool, PoolClient } from 'pg';
|
||||
import {
|
||||
NormalizedProduct,
|
||||
NormalizedPricing,
|
||||
NormalizedAvailability,
|
||||
NormalizedBrand,
|
||||
NormalizationResult,
|
||||
} from './types';
|
||||
|
||||
const BATCH_SIZE = 100;
|
||||
|
||||
// ============================================================
|
||||
// PRODUCT UPSERTS
|
||||
// ============================================================
|
||||
|
||||
export interface UpsertProductsResult {
|
||||
upserted: number;
|
||||
new: number;
|
||||
updated: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Upsert products to store_products table
|
||||
* Returns counts of new vs updated products
|
||||
*/
|
||||
export async function upsertStoreProducts(
|
||||
pool: Pool,
|
||||
products: NormalizedProduct[],
|
||||
pricing: Map<string, NormalizedPricing>,
|
||||
availability: Map<string, NormalizedAvailability>,
|
||||
options: { dryRun?: boolean } = {}
|
||||
): Promise<UpsertProductsResult> {
|
||||
if (products.length === 0) {
|
||||
return { upserted: 0, new: 0, updated: 0 };
|
||||
}
|
||||
|
||||
const { dryRun = false } = options;
|
||||
let newCount = 0;
|
||||
let updatedCount = 0;
|
||||
|
||||
// Process in batches
|
||||
for (let i = 0; i < products.length; i += BATCH_SIZE) {
|
||||
const batch = products.slice(i, i + BATCH_SIZE);
|
||||
|
||||
if (dryRun) {
|
||||
console.log(`[DryRun] Would upsert ${batch.length} products`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
await client.query('BEGIN');
|
||||
|
||||
for (const product of batch) {
|
||||
const productPricing = pricing.get(product.externalProductId);
|
||||
const productAvailability = availability.get(product.externalProductId);
|
||||
|
||||
const result = await client.query(
|
||||
`INSERT INTO store_products (
|
||||
dispensary_id, provider, provider_product_id, provider_brand_id,
|
||||
name, brand_name, category, subcategory,
|
||||
price_rec, price_med, price_rec_special, price_med_special,
|
||||
is_on_special, discount_percent,
|
||||
is_in_stock, stock_status,
|
||||
thc_percent, cbd_percent,
|
||||
image_url,
|
||||
first_seen_at, last_seen_at, updated_at
|
||||
) VALUES (
|
||||
$1, $2, $3, $4,
|
||||
$5, $6, $7, $8,
|
||||
$9, $10, $11, $12,
|
||||
$13, $14,
|
||||
$15, $16,
|
||||
$17, $18,
|
||||
$19,
|
||||
NOW(), NOW(), NOW()
|
||||
)
|
||||
ON CONFLICT (dispensary_id, provider, provider_product_id)
|
||||
DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
brand_name = EXCLUDED.brand_name,
|
||||
category = EXCLUDED.category,
|
||||
subcategory = EXCLUDED.subcategory,
|
||||
price_rec = EXCLUDED.price_rec,
|
||||
price_med = EXCLUDED.price_med,
|
||||
price_rec_special = EXCLUDED.price_rec_special,
|
||||
price_med_special = EXCLUDED.price_med_special,
|
||||
is_on_special = EXCLUDED.is_on_special,
|
||||
discount_percent = EXCLUDED.discount_percent,
|
||||
is_in_stock = EXCLUDED.is_in_stock,
|
||||
stock_status = EXCLUDED.stock_status,
|
||||
thc_percent = EXCLUDED.thc_percent,
|
||||
cbd_percent = EXCLUDED.cbd_percent,
|
||||
image_url = EXCLUDED.image_url,
|
||||
last_seen_at = NOW(),
|
||||
updated_at = NOW()
|
||||
RETURNING (xmax = 0) as is_new`,
|
||||
[
|
||||
product.dispensaryId,
|
||||
product.platform,
|
||||
product.externalProductId,
|
||||
product.brandId,
|
||||
product.name,
|
||||
product.brandName,
|
||||
product.category,
|
||||
product.subcategory,
|
||||
productPricing?.priceRec ? productPricing.priceRec / 100 : null,
|
||||
productPricing?.priceMed ? productPricing.priceMed / 100 : null,
|
||||
productPricing?.priceRecSpecial ? productPricing.priceRecSpecial / 100 : null,
|
||||
productPricing?.priceMedSpecial ? productPricing.priceMedSpecial / 100 : null,
|
||||
productPricing?.isOnSpecial || false,
|
||||
productPricing?.discountPercent,
|
||||
productAvailability?.inStock ?? true,
|
||||
productAvailability?.stockStatus || 'unknown',
|
||||
product.thcPercent,
|
||||
product.cbdPercent,
|
||||
product.primaryImageUrl,
|
||||
]
|
||||
);
|
||||
|
||||
if (result.rows[0]?.is_new) {
|
||||
newCount++;
|
||||
} else {
|
||||
updatedCount++;
|
||||
}
|
||||
}
|
||||
|
||||
await client.query('COMMIT');
|
||||
} catch (error) {
|
||||
await client.query('ROLLBACK');
|
||||
throw error;
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
upserted: newCount + updatedCount,
|
||||
new: newCount,
|
||||
updated: updatedCount,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// SNAPSHOT CREATION
|
||||
// ============================================================
|
||||
|
||||
export interface CreateSnapshotsResult {
|
||||
created: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create snapshots for all products in a crawl
|
||||
*/
|
||||
export async function createStoreProductSnapshots(
|
||||
pool: Pool,
|
||||
dispensaryId: number,
|
||||
products: NormalizedProduct[],
|
||||
pricing: Map<string, NormalizedPricing>,
|
||||
availability: Map<string, NormalizedAvailability>,
|
||||
crawlRunId: number | null,
|
||||
options: { dryRun?: boolean } = {}
|
||||
): Promise<CreateSnapshotsResult> {
|
||||
if (products.length === 0) {
|
||||
return { created: 0 };
|
||||
}
|
||||
|
||||
const { dryRun = false } = options;
|
||||
|
||||
if (dryRun) {
|
||||
console.log(`[DryRun] Would create ${products.length} snapshots`);
|
||||
return { created: products.length };
|
||||
}
|
||||
|
||||
let created = 0;
|
||||
|
||||
// Process in batches
|
||||
for (let i = 0; i < products.length; i += BATCH_SIZE) {
|
||||
const batch = products.slice(i, i + BATCH_SIZE);
|
||||
|
||||
const values: any[][] = [];
|
||||
for (const product of batch) {
|
||||
const productPricing = pricing.get(product.externalProductId);
|
||||
const productAvailability = availability.get(product.externalProductId);
|
||||
|
||||
values.push([
|
||||
dispensaryId,
|
||||
product.platform,
|
||||
product.externalProductId,
|
||||
crawlRunId,
|
||||
new Date(), // captured_at
|
||||
product.name,
|
||||
product.brandName,
|
||||
product.category,
|
||||
product.subcategory,
|
||||
productPricing?.priceRec ? productPricing.priceRec / 100 : null,
|
||||
productPricing?.priceMed ? productPricing.priceMed / 100 : null,
|
||||
productPricing?.priceRecSpecial ? productPricing.priceRecSpecial / 100 : null,
|
||||
productPricing?.priceMedSpecial ? productPricing.priceMedSpecial / 100 : null,
|
||||
productPricing?.isOnSpecial || false,
|
||||
productPricing?.discountPercent,
|
||||
productAvailability?.inStock ?? true,
|
||||
productAvailability?.quantity,
|
||||
productAvailability?.stockStatus || 'unknown',
|
||||
product.thcPercent,
|
||||
product.cbdPercent,
|
||||
product.primaryImageUrl,
|
||||
JSON.stringify(product.rawProduct),
|
||||
]);
|
||||
}
|
||||
|
||||
// Build bulk insert query
|
||||
const placeholders = values.map((_, idx) => {
|
||||
const offset = idx * 22;
|
||||
return `(${Array.from({ length: 22 }, (_, j) => `$${offset + j + 1}`).join(', ')})`;
|
||||
}).join(', ');
|
||||
|
||||
await pool.query(
|
||||
`INSERT INTO store_product_snapshots (
|
||||
dispensary_id, provider, provider_product_id, crawl_run_id,
|
||||
captured_at,
|
||||
name, brand_name, category, subcategory,
|
||||
price_rec, price_med, price_rec_special, price_med_special,
|
||||
is_on_special, discount_percent,
|
||||
is_in_stock, stock_quantity, stock_status,
|
||||
thc_percent, cbd_percent,
|
||||
image_url, raw_data
|
||||
) VALUES ${placeholders}`,
|
||||
values.flat()
|
||||
);
|
||||
|
||||
created += batch.length;
|
||||
}
|
||||
|
||||
return { created };
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// DISCONTINUED PRODUCTS
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Mark products as discontinued if they weren't in the current crawl
|
||||
*/
|
||||
export async function markDiscontinuedProducts(
|
||||
pool: Pool,
|
||||
dispensaryId: number,
|
||||
currentProductIds: Set<string>,
|
||||
platform: string,
|
||||
crawlRunId: number | null,
|
||||
options: { dryRun?: boolean } = {}
|
||||
): Promise<number> {
|
||||
const { dryRun = false } = options;
|
||||
|
||||
// Get all products for this dispensary/platform
|
||||
const result = await pool.query(
|
||||
`SELECT provider_product_id FROM store_products
|
||||
WHERE dispensary_id = $1 AND provider = $2 AND is_in_stock = TRUE`,
|
||||
[dispensaryId, platform]
|
||||
);
|
||||
|
||||
const existingIds = result.rows.map((r: any) => r.provider_product_id);
|
||||
const discontinuedIds = existingIds.filter((id: string) => !currentProductIds.has(id));
|
||||
|
||||
if (discontinuedIds.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (dryRun) {
|
||||
console.log(`[DryRun] Would mark ${discontinuedIds.length} products as discontinued`);
|
||||
return discontinuedIds.length;
|
||||
}
|
||||
|
||||
// Update store_products to mark as out of stock
|
||||
await pool.query(
|
||||
`UPDATE store_products
|
||||
SET is_in_stock = FALSE,
|
||||
stock_status = 'discontinued',
|
||||
updated_at = NOW()
|
||||
WHERE dispensary_id = $1
|
||||
AND provider = $2
|
||||
AND provider_product_id = ANY($3)`,
|
||||
[dispensaryId, platform, discontinuedIds]
|
||||
);
|
||||
|
||||
// Create snapshots for discontinued products
|
||||
for (const productId of discontinuedIds) {
|
||||
await pool.query(
|
||||
`INSERT INTO store_product_snapshots (
|
||||
dispensary_id, provider, provider_product_id, crawl_run_id,
|
||||
captured_at, is_in_stock, stock_status
|
||||
)
|
||||
SELECT
|
||||
dispensary_id, provider, provider_product_id, $4,
|
||||
NOW(), FALSE, 'discontinued'
|
||||
FROM store_products
|
||||
WHERE dispensary_id = $1 AND provider = $2 AND provider_product_id = $3`,
|
||||
[dispensaryId, platform, productId, crawlRunId]
|
||||
);
|
||||
}
|
||||
|
||||
return discontinuedIds.length;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// BRAND UPSERTS
|
||||
// ============================================================
|
||||
|
||||
export interface UpsertBrandsResult {
|
||||
upserted: number;
|
||||
new: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Upsert brands to brands table
|
||||
*/
|
||||
export async function upsertBrands(
|
||||
pool: Pool,
|
||||
brands: NormalizedBrand[],
|
||||
options: { dryRun?: boolean; skipIfExists?: boolean } = {}
|
||||
): Promise<UpsertBrandsResult> {
|
||||
if (brands.length === 0) {
|
||||
return { upserted: 0, new: 0 };
|
||||
}
|
||||
|
||||
const { dryRun = false, skipIfExists = true } = options;
|
||||
|
||||
if (dryRun) {
|
||||
console.log(`[DryRun] Would upsert ${brands.length} brands`);
|
||||
return { upserted: brands.length, new: 0 };
|
||||
}
|
||||
|
||||
let newCount = 0;
|
||||
|
||||
for (const brand of brands) {
|
||||
const result = await pool.query(
|
||||
`INSERT INTO brands (name, slug, external_id, logo_url, created_at, updated_at)
|
||||
VALUES ($1, $2, $3, $4, NOW(), NOW())
|
||||
ON CONFLICT (slug) DO ${skipIfExists ? 'NOTHING' : 'UPDATE SET logo_url = COALESCE(EXCLUDED.logo_url, brands.logo_url), updated_at = NOW()'}
|
||||
RETURNING (xmax = 0) as is_new`,
|
||||
[brand.name, brand.slug, brand.externalBrandId, brand.logoUrl]
|
||||
);
|
||||
|
||||
if (result.rows[0]?.is_new) {
|
||||
newCount++;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
upserted: brands.length,
|
||||
new: newCount,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// FULL HYDRATION
|
||||
// ============================================================
|
||||
|
||||
export interface HydratePayloadResult {
|
||||
productsUpserted: number;
|
||||
productsNew: number;
|
||||
productsUpdated: number;
|
||||
productsDiscontinued: number;
|
||||
snapshotsCreated: number;
|
||||
brandsCreated: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Hydrate a complete normalization result into canonical tables
|
||||
*/
|
||||
export async function hydrateToCanonical(
|
||||
pool: Pool,
|
||||
dispensaryId: number,
|
||||
normResult: NormalizationResult,
|
||||
crawlRunId: number | null,
|
||||
options: { dryRun?: boolean } = {}
|
||||
): Promise<HydratePayloadResult> {
|
||||
const { dryRun = false } = options;
|
||||
|
||||
// 1. Upsert brands
|
||||
const brandResult = await upsertBrands(pool, normResult.brands, { dryRun });
|
||||
|
||||
// 2. Upsert products
|
||||
const productResult = await upsertStoreProducts(
|
||||
pool,
|
||||
normResult.products,
|
||||
normResult.pricing,
|
||||
normResult.availability,
|
||||
{ dryRun }
|
||||
);
|
||||
|
||||
// 3. Create snapshots
|
||||
const snapshotResult = await createStoreProductSnapshots(
|
||||
pool,
|
||||
dispensaryId,
|
||||
normResult.products,
|
||||
normResult.pricing,
|
||||
normResult.availability,
|
||||
crawlRunId,
|
||||
{ dryRun }
|
||||
);
|
||||
|
||||
// 4. Mark discontinued products
|
||||
const currentProductIds = new Set(
|
||||
normResult.products.map((p) => p.externalProductId)
|
||||
);
|
||||
const platform = normResult.products[0]?.platform || 'dutchie';
|
||||
const discontinuedCount = await markDiscontinuedProducts(
|
||||
pool,
|
||||
dispensaryId,
|
||||
currentProductIds,
|
||||
platform,
|
||||
crawlRunId,
|
||||
{ dryRun }
|
||||
);
|
||||
|
||||
return {
|
||||
productsUpserted: productResult.upserted,
|
||||
productsNew: productResult.new,
|
||||
productsUpdated: productResult.updated,
|
||||
productsDiscontinued: discontinuedCount,
|
||||
snapshotsCreated: snapshotResult.created,
|
||||
brandsCreated: brandResult.new,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user