feat: Add v2 architecture with multi-state support and orchestrator services

Major additions:
- Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare
- Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator
- Discovery system: dutchie discovery service, geo validation, city seeding scripts
- Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages
- Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram)
- Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata

Frontend pages added:
- Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores
- StateHeatmap, CrossStateCompare, SyncInfoPanel

Components added:
- StateSelector, OrchestratorTraceModal, WorkflowStepper

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-07 11:30:57 -07:00
parent 8ac64ba077
commit b4a2fb7d03
248 changed files with 60714 additions and 666 deletions

View File

@@ -0,0 +1,435 @@
/**
* Canonical Upsert Functions
*
* Upserts normalized data into canonical tables:
* - store_products
* - store_product_snapshots
* - brands
* - categories (future)
*/
import { Pool, PoolClient } from 'pg';
import {
NormalizedProduct,
NormalizedPricing,
NormalizedAvailability,
NormalizedBrand,
NormalizationResult,
} from './types';
const BATCH_SIZE = 100;
// ============================================================
// PRODUCT UPSERTS
// ============================================================
export interface UpsertProductsResult {
upserted: number;
new: number;
updated: number;
}
/**
* Upsert products to store_products table
* Returns counts of new vs updated products
*/
export async function upsertStoreProducts(
pool: Pool,
products: NormalizedProduct[],
pricing: Map<string, NormalizedPricing>,
availability: Map<string, NormalizedAvailability>,
options: { dryRun?: boolean } = {}
): Promise<UpsertProductsResult> {
if (products.length === 0) {
return { upserted: 0, new: 0, updated: 0 };
}
const { dryRun = false } = options;
let newCount = 0;
let updatedCount = 0;
// Process in batches
for (let i = 0; i < products.length; i += BATCH_SIZE) {
const batch = products.slice(i, i + BATCH_SIZE);
if (dryRun) {
console.log(`[DryRun] Would upsert ${batch.length} products`);
continue;
}
const client = await pool.connect();
try {
await client.query('BEGIN');
for (const product of batch) {
const productPricing = pricing.get(product.externalProductId);
const productAvailability = availability.get(product.externalProductId);
const result = await client.query(
`INSERT INTO store_products (
dispensary_id, provider, provider_product_id, provider_brand_id,
name, brand_name, category, subcategory,
price_rec, price_med, price_rec_special, price_med_special,
is_on_special, discount_percent,
is_in_stock, stock_status,
thc_percent, cbd_percent,
image_url,
first_seen_at, last_seen_at, updated_at
) VALUES (
$1, $2, $3, $4,
$5, $6, $7, $8,
$9, $10, $11, $12,
$13, $14,
$15, $16,
$17, $18,
$19,
NOW(), NOW(), NOW()
)
ON CONFLICT (dispensary_id, provider, provider_product_id)
DO UPDATE SET
name = EXCLUDED.name,
brand_name = EXCLUDED.brand_name,
category = EXCLUDED.category,
subcategory = EXCLUDED.subcategory,
price_rec = EXCLUDED.price_rec,
price_med = EXCLUDED.price_med,
price_rec_special = EXCLUDED.price_rec_special,
price_med_special = EXCLUDED.price_med_special,
is_on_special = EXCLUDED.is_on_special,
discount_percent = EXCLUDED.discount_percent,
is_in_stock = EXCLUDED.is_in_stock,
stock_status = EXCLUDED.stock_status,
thc_percent = EXCLUDED.thc_percent,
cbd_percent = EXCLUDED.cbd_percent,
image_url = EXCLUDED.image_url,
last_seen_at = NOW(),
updated_at = NOW()
RETURNING (xmax = 0) as is_new`,
[
product.dispensaryId,
product.platform,
product.externalProductId,
product.brandId,
product.name,
product.brandName,
product.category,
product.subcategory,
productPricing?.priceRec ? productPricing.priceRec / 100 : null,
productPricing?.priceMed ? productPricing.priceMed / 100 : null,
productPricing?.priceRecSpecial ? productPricing.priceRecSpecial / 100 : null,
productPricing?.priceMedSpecial ? productPricing.priceMedSpecial / 100 : null,
productPricing?.isOnSpecial || false,
productPricing?.discountPercent,
productAvailability?.inStock ?? true,
productAvailability?.stockStatus || 'unknown',
product.thcPercent,
product.cbdPercent,
product.primaryImageUrl,
]
);
if (result.rows[0]?.is_new) {
newCount++;
} else {
updatedCount++;
}
}
await client.query('COMMIT');
} catch (error) {
await client.query('ROLLBACK');
throw error;
} finally {
client.release();
}
}
return {
upserted: newCount + updatedCount,
new: newCount,
updated: updatedCount,
};
}
// ============================================================
// SNAPSHOT CREATION
// ============================================================
export interface CreateSnapshotsResult {
created: number;
}
/**
* Create snapshots for all products in a crawl
*/
export async function createStoreProductSnapshots(
pool: Pool,
dispensaryId: number,
products: NormalizedProduct[],
pricing: Map<string, NormalizedPricing>,
availability: Map<string, NormalizedAvailability>,
crawlRunId: number | null,
options: { dryRun?: boolean } = {}
): Promise<CreateSnapshotsResult> {
if (products.length === 0) {
return { created: 0 };
}
const { dryRun = false } = options;
if (dryRun) {
console.log(`[DryRun] Would create ${products.length} snapshots`);
return { created: products.length };
}
let created = 0;
// Process in batches
for (let i = 0; i < products.length; i += BATCH_SIZE) {
const batch = products.slice(i, i + BATCH_SIZE);
const values: any[][] = [];
for (const product of batch) {
const productPricing = pricing.get(product.externalProductId);
const productAvailability = availability.get(product.externalProductId);
values.push([
dispensaryId,
product.platform,
product.externalProductId,
crawlRunId,
new Date(), // captured_at
product.name,
product.brandName,
product.category,
product.subcategory,
productPricing?.priceRec ? productPricing.priceRec / 100 : null,
productPricing?.priceMed ? productPricing.priceMed / 100 : null,
productPricing?.priceRecSpecial ? productPricing.priceRecSpecial / 100 : null,
productPricing?.priceMedSpecial ? productPricing.priceMedSpecial / 100 : null,
productPricing?.isOnSpecial || false,
productPricing?.discountPercent,
productAvailability?.inStock ?? true,
productAvailability?.quantity,
productAvailability?.stockStatus || 'unknown',
product.thcPercent,
product.cbdPercent,
product.primaryImageUrl,
JSON.stringify(product.rawProduct),
]);
}
// Build bulk insert query
const placeholders = values.map((_, idx) => {
const offset = idx * 22;
return `(${Array.from({ length: 22 }, (_, j) => `$${offset + j + 1}`).join(', ')})`;
}).join(', ');
await pool.query(
`INSERT INTO store_product_snapshots (
dispensary_id, provider, provider_product_id, crawl_run_id,
captured_at,
name, brand_name, category, subcategory,
price_rec, price_med, price_rec_special, price_med_special,
is_on_special, discount_percent,
is_in_stock, stock_quantity, stock_status,
thc_percent, cbd_percent,
image_url, raw_data
) VALUES ${placeholders}`,
values.flat()
);
created += batch.length;
}
return { created };
}
// ============================================================
// DISCONTINUED PRODUCTS
// ============================================================
/**
* Mark products as discontinued if they weren't in the current crawl
*/
export async function markDiscontinuedProducts(
pool: Pool,
dispensaryId: number,
currentProductIds: Set<string>,
platform: string,
crawlRunId: number | null,
options: { dryRun?: boolean } = {}
): Promise<number> {
const { dryRun = false } = options;
// Get all products for this dispensary/platform
const result = await pool.query(
`SELECT provider_product_id FROM store_products
WHERE dispensary_id = $1 AND provider = $2 AND is_in_stock = TRUE`,
[dispensaryId, platform]
);
const existingIds = result.rows.map((r: any) => r.provider_product_id);
const discontinuedIds = existingIds.filter((id: string) => !currentProductIds.has(id));
if (discontinuedIds.length === 0) {
return 0;
}
if (dryRun) {
console.log(`[DryRun] Would mark ${discontinuedIds.length} products as discontinued`);
return discontinuedIds.length;
}
// Update store_products to mark as out of stock
await pool.query(
`UPDATE store_products
SET is_in_stock = FALSE,
stock_status = 'discontinued',
updated_at = NOW()
WHERE dispensary_id = $1
AND provider = $2
AND provider_product_id = ANY($3)`,
[dispensaryId, platform, discontinuedIds]
);
// Create snapshots for discontinued products
for (const productId of discontinuedIds) {
await pool.query(
`INSERT INTO store_product_snapshots (
dispensary_id, provider, provider_product_id, crawl_run_id,
captured_at, is_in_stock, stock_status
)
SELECT
dispensary_id, provider, provider_product_id, $4,
NOW(), FALSE, 'discontinued'
FROM store_products
WHERE dispensary_id = $1 AND provider = $2 AND provider_product_id = $3`,
[dispensaryId, platform, productId, crawlRunId]
);
}
return discontinuedIds.length;
}
// ============================================================
// BRAND UPSERTS
// ============================================================
export interface UpsertBrandsResult {
upserted: number;
new: number;
}
/**
* Upsert brands to brands table
*/
export async function upsertBrands(
pool: Pool,
brands: NormalizedBrand[],
options: { dryRun?: boolean; skipIfExists?: boolean } = {}
): Promise<UpsertBrandsResult> {
if (brands.length === 0) {
return { upserted: 0, new: 0 };
}
const { dryRun = false, skipIfExists = true } = options;
if (dryRun) {
console.log(`[DryRun] Would upsert ${brands.length} brands`);
return { upserted: brands.length, new: 0 };
}
let newCount = 0;
for (const brand of brands) {
const result = await pool.query(
`INSERT INTO brands (name, slug, external_id, logo_url, created_at, updated_at)
VALUES ($1, $2, $3, $4, NOW(), NOW())
ON CONFLICT (slug) DO ${skipIfExists ? 'NOTHING' : 'UPDATE SET logo_url = COALESCE(EXCLUDED.logo_url, brands.logo_url), updated_at = NOW()'}
RETURNING (xmax = 0) as is_new`,
[brand.name, brand.slug, brand.externalBrandId, brand.logoUrl]
);
if (result.rows[0]?.is_new) {
newCount++;
}
}
return {
upserted: brands.length,
new: newCount,
};
}
// ============================================================
// FULL HYDRATION
// ============================================================
export interface HydratePayloadResult {
productsUpserted: number;
productsNew: number;
productsUpdated: number;
productsDiscontinued: number;
snapshotsCreated: number;
brandsCreated: number;
}
/**
* Hydrate a complete normalization result into canonical tables
*/
export async function hydrateToCanonical(
pool: Pool,
dispensaryId: number,
normResult: NormalizationResult,
crawlRunId: number | null,
options: { dryRun?: boolean } = {}
): Promise<HydratePayloadResult> {
const { dryRun = false } = options;
// 1. Upsert brands
const brandResult = await upsertBrands(pool, normResult.brands, { dryRun });
// 2. Upsert products
const productResult = await upsertStoreProducts(
pool,
normResult.products,
normResult.pricing,
normResult.availability,
{ dryRun }
);
// 3. Create snapshots
const snapshotResult = await createStoreProductSnapshots(
pool,
dispensaryId,
normResult.products,
normResult.pricing,
normResult.availability,
crawlRunId,
{ dryRun }
);
// 4. Mark discontinued products
const currentProductIds = new Set(
normResult.products.map((p) => p.externalProductId)
);
const platform = normResult.products[0]?.platform || 'dutchie';
const discontinuedCount = await markDiscontinuedProducts(
pool,
dispensaryId,
currentProductIds,
platform,
crawlRunId,
{ dryRun }
);
return {
productsUpserted: productResult.upserted,
productsNew: productResult.new,
productsUpdated: productResult.updated,
productsDiscontinued: discontinuedCount,
snapshotsCreated: snapshotResult.created,
brandsCreated: brandResult.new,
};
}