fix: Public API column mappings and OOS detection

- Fix store_products column references (name_raw, brand_name_raw, category_raw)
- Fix v_product_snapshots column references (crawled_at, *_cents pricing)
- Fix dispensaries column references (zipcode, logo_image, remove hours/amenities)
- Add services and license_type to dispensary API response
- Add consecutive_misses OOS tracking to product-resync handler
- Add migration 075 for consecutive_misses column
- Add CRAWL_PIPELINE.md documentation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-09 20:44:53 -07:00
parent 9c6dd37316
commit 0295637ed6
4 changed files with 727 additions and 68 deletions

View File

@@ -430,14 +430,14 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => {
// Filter by category
if (category) {
whereClause += ` AND LOWER(p.category) = LOWER($${paramIndex})`;
whereClause += ` AND LOWER(p.category_raw) = LOWER($${paramIndex})`;
params.push(category);
paramIndex++;
}
// Filter by brand
if (brand) {
whereClause += ` AND LOWER(p.brand_name) LIKE LOWER($${paramIndex})`;
whereClause += ` AND LOWER(p.brand_name_raw) LIKE LOWER($${paramIndex})`;
params.push(`%${brand}%`);
paramIndex++;
}
@@ -468,7 +468,7 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => {
// Search by name or brand
if (search) {
whereClause += ` AND (LOWER(p.name) LIKE LOWER($${paramIndex}) OR LOWER(p.brand_name) LIKE LOWER($${paramIndex}))`;
whereClause += ` AND (LOWER(p.name_raw) LIKE LOWER($${paramIndex}) OR LOWER(p.brand_name_raw) LIKE LOWER($${paramIndex}))`;
params.push(`%${search}%`);
paramIndex++;
}
@@ -479,10 +479,11 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => {
// Build ORDER BY clause (use pricing_type for price sorting)
const sortDirection = sort_dir === 'desc' ? 'DESC' : 'ASC';
let orderBy = 'p.name ASC';
let orderBy = 'p.name_raw ASC';
switch (sort_by) {
case 'price':
const sortPriceCol = pricing_type === 'med' ? 's.price_med' : 's.price_rec';
// View uses *_cents columns, but we SELECT as price_rec/price_med
const sortPriceCol = pricing_type === 'med' ? 's.med_min_price_cents' : 's.rec_min_price_cents';
orderBy = `${sortPriceCol} ${sortDirection} NULLS LAST`;
break;
case 'thc':
@@ -493,13 +494,14 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => {
break;
case 'name':
default:
orderBy = `p.name ${sortDirection}`;
orderBy = `p.name_raw ${sortDirection}`;
}
params.push(limitNum, offsetNum);
// Determine which price column to use for filtering based on pricing_type
const priceColumn = pricing_type === 'med' ? 's.price_med' : 's.price_rec';
// View uses *_cents columns, divide by 100 for dollar comparison
const priceColumn = pricing_type === 'med' ? 's.med_min_price_cents / 100.0' : 's.rec_min_price_cents / 100.0';
// Query products with latest snapshot data
// Uses store_products + v_product_snapshots (canonical tables with raw_data)
@@ -508,10 +510,10 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => {
p.id,
p.dispensary_id,
p.provider_product_id as dutchie_id,
p.name,
p.brand_name as brand,
p.category,
p.subcategory,
p.name_raw as name,
p.brand_name_raw as brand,
p.category_raw as category,
p.subcategory_raw as subcategory,
p.strain_type,
p.stock_status,
p.thc_percent as thc,
@@ -519,19 +521,19 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => {
p.image_url,
p.created_at,
p.updated_at,
s.price_rec,
s.price_med,
s.price_rec_special,
s.price_med_special,
s.rec_min_price_cents / 100.0 as price_rec,
s.med_min_price_cents / 100.0 as price_med,
s.rec_min_special_price_cents / 100.0 as price_rec_special,
s.med_min_special_price_cents / 100.0 as price_med_special,
s.stock_quantity as total_quantity_available,
s.is_on_special as special,
s.captured_at as snapshot_at,
s.special,
s.crawled_at as snapshot_at,
${include_variants === 'true' || include_variants === '1' ? "s.raw_data->'POSMetaData'->'children' as variants_raw" : 'NULL as variants_raw'}
FROM store_products p
LEFT JOIN LATERAL (
SELECT * FROM v_product_snapshots
WHERE store_product_id = p.id
ORDER BY captured_at DESC
ORDER BY crawled_at DESC
LIMIT 1
) s ON true
${whereClause}
@@ -545,9 +547,9 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => {
const { rows: countRows } = await pool.query(`
SELECT COUNT(*) as total FROM store_products p
LEFT JOIN LATERAL (
SELECT price_rec, price_med, is_on_special FROM v_product_snapshots
SELECT rec_min_price_cents / 100.0 as price_rec, med_min_price_cents / 100.0 as price_med, special as is_on_special FROM v_product_snapshots
WHERE store_product_id = p.id
ORDER BY captured_at DESC
ORDER BY crawled_at DESC
LIMIT 1
) s ON true
${whereClause}
@@ -1002,22 +1004,27 @@ router.get('/dispensaries', async (req: PublicApiRequest, res: Response) => {
SELECT
d.id,
d.name,
d.address,
d.address1,
d.address2,
d.city,
d.state,
d.zip,
d.zipcode as zip,
d.phone,
d.email,
d.website,
d.latitude,
d.longitude,
d.menu_type as platform,
d.menu_url,
d.hours,
d.amenities,
d.description,
d.image_url,
d.logo_image as image_url,
d.google_rating,
d.google_review_count,
d.offer_pickup,
d.offer_delivery,
d.offer_curbside_pickup,
d.is_medical,
d.is_recreational,
COALESCE(pc.product_count, 0) as product_count,
COALESCE(pc.in_stock_count, 0) as in_stock_count,
pc.last_updated
@@ -1051,11 +1058,13 @@ router.get('/dispensaries', async (req: PublicApiRequest, res: Response) => {
dispensaries: [{
id: d.id,
name: d.name,
address: d.address,
address1: d.address1,
address2: d.address2,
city: d.city,
state: d.state,
zip: d.zip,
phone: d.phone,
email: d.email,
website: d.website,
menu_url: d.menu_url,
location: d.latitude && d.longitude ? {
@@ -1063,10 +1072,17 @@ router.get('/dispensaries', async (req: PublicApiRequest, res: Response) => {
longitude: parseFloat(d.longitude)
} : null,
platform: d.platform,
hours: d.hours || null,
amenities: d.amenities || [],
description: d.description || null,
image_url: d.image_url || null,
services: {
pickup: d.offer_pickup || false,
delivery: d.offer_delivery || false,
curbside: d.offer_curbside_pickup || false
},
license_type: {
medical: d.is_medical || false,
recreational: d.is_recreational || false
},
rating: d.google_rating ? parseFloat(d.google_rating) : null,
review_count: d.google_review_count ? parseInt(d.google_review_count, 10) : null,
product_count: parseInt(d.product_count || '0', 10),
@@ -1109,22 +1125,27 @@ router.get('/dispensaries', async (req: PublicApiRequest, res: Response) => {
SELECT
d.id,
d.name,
d.address,
d.address1,
d.address2,
d.city,
d.state,
d.zip,
d.zipcode as zip,
d.phone,
d.email,
d.website,
d.latitude,
d.longitude,
d.menu_type as platform,
d.menu_url,
d.hours,
d.amenities,
d.description,
d.image_url,
d.logo_image as image_url,
d.google_rating,
d.google_review_count,
d.offer_pickup,
d.offer_delivery,
d.offer_curbside_pickup,
d.is_medical,
d.is_recreational,
COALESCE(pc.product_count, 0) as product_count,
COALESCE(pc.in_stock_count, 0) as in_stock_count,
pc.last_updated
@@ -1158,11 +1179,13 @@ router.get('/dispensaries', async (req: PublicApiRequest, res: Response) => {
const transformedDispensaries = dispensaries.map((d) => ({
id: d.id,
name: d.name,
address: d.address,
address1: d.address1,
address2: d.address2,
city: d.city,
state: d.state,
zip: d.zip,
phone: d.phone,
email: d.email,
website: d.website,
menu_url: d.menu_url,
location: d.latitude && d.longitude ? {
@@ -1170,10 +1193,17 @@ router.get('/dispensaries', async (req: PublicApiRequest, res: Response) => {
longitude: parseFloat(d.longitude)
} : null,
platform: d.platform,
hours: d.hours || null,
amenities: d.amenities || [],
description: d.description || null,
image_url: d.image_url || null,
services: {
pickup: d.offer_pickup || false,
delivery: d.offer_delivery || false,
curbside: d.offer_curbside_pickup || false
},
license_type: {
medical: d.is_medical || false,
recreational: d.is_recreational || false
},
rating: d.google_rating ? parseFloat(d.google_rating) : null,
review_count: d.google_review_count ? parseInt(d.google_review_count, 10) : null,
product_count: parseInt(d.product_count || '0', 10),
@@ -1415,8 +1445,8 @@ router.get('/stores/:id/metrics', async (req: PublicApiRequest, res: Response) =
COUNT(*) as total_products,
COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock,
COUNT(*) FILTER (WHERE stock_status = 'out_of_stock') as out_of_stock,
COUNT(DISTINCT brand_name) FILTER (WHERE brand_name IS NOT NULL) as unique_brands,
COUNT(DISTINCT category) FILTER (WHERE category IS NOT NULL) as unique_categories
COUNT(DISTINCT brand_name_raw) FILTER (WHERE brand_name_raw IS NOT NULL) as unique_brands,
COUNT(DISTINCT category_raw) FILTER (WHERE category_raw IS NOT NULL) as unique_categories
FROM store_products
WHERE dispensary_id = $1
`, [storeId]);
@@ -1441,12 +1471,12 @@ router.get('/stores/:id/metrics', async (req: PublicApiRequest, res: Response) =
// Get category breakdown
const { rows: categoryBreakdown } = await pool.query(`
SELECT
COALESCE(category, 'Uncategorized') as category,
COALESCE(category_raw, 'Uncategorized') as category,
COUNT(*) as count,
COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock
FROM store_products
WHERE dispensary_id = $1
GROUP BY category
GROUP BY category_raw
ORDER BY count DESC
LIMIT 10
`, [storeId]);
@@ -1584,9 +1614,9 @@ router.get('/stores/:id/product-metrics', async (req: PublicApiRequest, res: Res
)
SELECT
sp.id,
sp.name,
sp.brand_name,
sp.category,
sp.name_raw as name,
sp.brand_name_raw as brand_name,
sp.category_raw as category,
sp.stock_status,
ls.current_price,
ls.current_special_price,
@@ -1606,7 +1636,7 @@ router.get('/stores/:id/product-metrics', async (req: PublicApiRequest, res: Res
${whereClause}
ORDER BY
${sort_by === 'price' ? 'ls.current_price DESC NULLS LAST' :
sort_by === 'stock_status' ? "CASE sp.stock_status WHEN 'out_of_stock' THEN 0 ELSE 1 END, sp.name" :
sort_by === 'stock_status' ? "CASE sp.stock_status WHEN 'out_of_stock' THEN 0 ELSE 1 END, sp.name_raw" :
'ABS(COALESCE(price_change_percent, 0)) DESC'}
LIMIT $${paramIndex}
`, params);
@@ -1719,7 +1749,7 @@ router.get('/stores/:id/competitor-snapshot', async (req: PublicApiRequest, res:
// Get this store's average prices by category
const { rows: storePrices } = await pool.query(`
SELECT
sp.category,
sp.category_raw as category,
ROUND(AVG(sps.price_rec)::numeric, 2) as avg_price,
COUNT(*) as product_count
FROM store_products sp
@@ -1729,8 +1759,8 @@ router.get('/stores/:id/competitor-snapshot', async (req: PublicApiRequest, res:
WHERE dispensary_id = $1
ORDER BY store_product_id, captured_at DESC
) sps ON sp.id = sps.store_product_id
WHERE sp.dispensary_id = $1 AND sp.category IS NOT NULL AND sps.price_rec > 0
GROUP BY sp.category
WHERE sp.dispensary_id = $1 AND sp.category_raw IS NOT NULL AND sps.price_rec > 0
GROUP BY sp.category_raw
`, [storeId]);
// Get market average prices by category (all competitors)
@@ -1740,7 +1770,7 @@ router.get('/stores/:id/competitor-snapshot', async (req: PublicApiRequest, res:
if (competitorIds.length > 0) {
const { rows } = await pool.query(`
SELECT
sp.category,
sp.category_raw as category,
ROUND(AVG(sps.price_rec)::numeric, 2) as market_avg_price,
COUNT(DISTINCT sp.dispensary_id) as store_count
FROM store_products sp
@@ -1750,17 +1780,17 @@ router.get('/stores/:id/competitor-snapshot', async (req: PublicApiRequest, res:
WHERE dispensary_id = ANY($1)
ORDER BY store_product_id, captured_at DESC
) sps ON sp.id = sps.store_product_id
WHERE sp.dispensary_id = ANY($1) AND sp.category IS NOT NULL AND sps.price_rec > 0
GROUP BY sp.category
WHERE sp.dispensary_id = ANY($1) AND sp.category_raw IS NOT NULL AND sps.price_rec > 0
GROUP BY sp.category_raw
`, [competitorIds]);
marketPrices = rows;
}
// Get this store's brands
const { rows: storeBrands } = await pool.query(`
SELECT DISTINCT brand_name
SELECT DISTINCT brand_name_raw as brand_name
FROM store_products
WHERE dispensary_id = $1 AND brand_name IS NOT NULL
WHERE dispensary_id = $1 AND brand_name_raw IS NOT NULL
`, [storeId]);
const storeBrandSet = new Set(storeBrands.map(b => b.brand_name.toLowerCase()));
@@ -1772,13 +1802,13 @@ router.get('/stores/:id/competitor-snapshot', async (req: PublicApiRequest, res:
SELECT
d.id as competitor_id,
d.name as competitor_name,
COUNT(DISTINCT sp.brand_name) as total_brands,
COUNT(DISTINCT sp.brand_name) FILTER (
WHERE LOWER(sp.brand_name) = ANY($2)
COUNT(DISTINCT sp.brand_name_raw) as total_brands,
COUNT(DISTINCT sp.brand_name_raw) FILTER (
WHERE LOWER(sp.brand_name_raw) = ANY($2)
) as shared_brands
FROM dispensaries d
INNER JOIN store_products sp ON sp.dispensary_id = d.id
WHERE d.id = ANY($1) AND sp.brand_name IS NOT NULL
WHERE d.id = ANY($1) AND sp.brand_name_raw IS NOT NULL
GROUP BY d.id, d.name
`, [competitorIds, Array.from(storeBrandSet)]);
brandOverlap = rows;
@@ -1835,6 +1865,39 @@ router.get('/stores/:id/competitor-snapshot', async (req: PublicApiRequest, res:
}
});
/**
* GET /api/v1/stats
* Get aggregate stats for consumer sites (product count, brand count, dispensary count)
*/
router.get('/stats', async (req: PublicApiRequest, res: Response) => {
try {
// Get aggregate stats across all data
const { rows: stats } = await pool.query(`
SELECT
(SELECT COUNT(*) FROM store_products) as product_count,
(SELECT COUNT(DISTINCT brand_name_raw) FROM store_products WHERE brand_name_raw IS NOT NULL) as brand_count,
(SELECT COUNT(*) FROM dispensaries WHERE crawl_enabled = true AND product_count > 0) as dispensary_count
`);
const s = stats[0] || {};
res.json({
success: true,
stats: {
products: parseInt(s.product_count || '0', 10),
brands: parseInt(s.brand_count || '0', 10),
dispensaries: parseInt(s.dispensary_count || '0', 10)
}
});
} catch (error: any) {
console.error('Public API stats error:', error);
res.status(500).json({
error: 'Failed to fetch stats',
message: error.message
});
}
});
/**
* GET /api/v1/menu
* Get complete menu summary for the authenticated dispensary