Files
cannaiq/backend/src/services/analytics/StateAnalyticsService.ts
Kelly 9aefb554bc fix: Correct Analytics V2 SQL queries for schema alignment
- Fix JOIN path: store_products -> dispensaries -> states (was incorrectly joining sp.state_id which doesn't exist)
- Fix column names to use *_raw suffixes (category_raw, brand_name_raw, name_raw)
- Fix row mappings to read correct column names from query results
- Add ::timestamp casts for interval arithmetic in StoreAnalyticsService

All Analytics V2 endpoints now work correctly:
- /state/legal-breakdown
- /state/recreational
- /category/all
- /category/rec-vs-med
- /state/:code/summary
- /store/:id/summary

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-10 18:52:57 -07:00

539 lines
18 KiB
TypeScript

/**
* StateAnalyticsService
*
* Analytics for state-level market data and comparisons.
*
* Data Sources:
* - states: Legal status, year of legalization
* - dispensaries: Store counts by state
* - store_products: Product/brand coverage by state
* - store_product_snapshots: Historical data depth
*
* Key Metrics:
* - Legal state breakdown (rec, med-only, illegal)
* - Coverage by state (dispensaries, products, brands)
* - Rec vs Med price comparisons
* - Data freshness per state
*/
import { Pool } from 'pg';
import {
StateMarketSummary,
LegalStateBreakdown,
RecVsMedPriceComparison,
LegalType,
getLegalTypeFilter,
} from './types';
export class StateAnalyticsService {
constructor(private pool: Pool) {}
// ============================================================
// HELPER METHODS FOR LEGAL TYPE FILTERING
// ============================================================
/**
* Get recreational-only state codes
*/
async getRecreationalStates(): Promise<string[]> {
const result = await this.pool.query(`
SELECT code FROM states WHERE recreational_legal = TRUE ORDER BY code
`);
return result.rows.map((r: any) => r.code);
}
/**
* Get medical-only state codes (not recreational)
*/
async getMedicalOnlyStates(): Promise<string[]> {
const result = await this.pool.query(`
SELECT code FROM states
WHERE medical_legal = TRUE
AND (recreational_legal = FALSE OR recreational_legal IS NULL)
ORDER BY code
`);
return result.rows.map((r: any) => r.code);
}
/**
* Get no-program state codes
*/
async getNoProgramStates(): Promise<string[]> {
const result = await this.pool.query(`
SELECT code FROM states
WHERE (recreational_legal = FALSE OR recreational_legal IS NULL)
AND (medical_legal = FALSE OR medical_legal IS NULL)
ORDER BY code
`);
return result.rows.map((r: any) => r.code);
}
/**
* Get state IDs by legal type for use in subqueries
*/
async getStateIdsByLegalType(legalType: LegalType): Promise<number[]> {
const filter = getLegalTypeFilter(legalType);
const result = await this.pool.query(`
SELECT s.id FROM states s WHERE ${filter} ORDER BY s.id
`);
return result.rows.map((r: any) => r.id);
}
/**
* Get market summary for a specific state
*/
async getStateMarketSummary(stateCode: string): Promise<StateMarketSummary | null> {
// Get state info
const stateResult = await this.pool.query(`
SELECT
s.id,
s.code,
s.name,
s.recreational_legal,
s.rec_year,
s.medical_legal,
s.med_year
FROM states s
WHERE s.code = $1
`, [stateCode]);
if (stateResult.rows.length === 0) {
return null;
}
const state = stateResult.rows[0];
// Get coverage metrics
const coverageResult = await this.pool.query(`
SELECT
COUNT(DISTINCT d.id) AS dispensary_count,
COUNT(DISTINCT sp.id) AS product_count,
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL) AS brand_count,
COUNT(DISTINCT sp.category_raw) FILTER (WHERE sp.category_raw IS NOT NULL) AS category_count,
COUNT(sps.id) AS snapshot_count,
MAX(sps.captured_at) AS last_crawl_at
FROM states s
LEFT JOIN dispensaries d ON d.state_id = s.id
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
WHERE s.code = $1
`, [stateCode]);
const coverage = coverageResult.rows[0];
// Get pricing metrics
const pricingResult = await this.pool.query(`
SELECT
AVG(price_rec) AS avg_price,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY price_rec) AS median_price,
MIN(price_rec) AS min_price,
MAX(price_rec) AS max_price
FROM store_products sp
JOIN dispensaries d ON d.id = sp.dispensary_id
JOIN states s ON s.id = d.state_id
WHERE s.code = $1
AND sp.price_rec IS NOT NULL
AND sp.is_in_stock = TRUE
`, [stateCode]);
const pricing = pricingResult.rows[0];
// Get top categories
const topCategoriesResult = await this.pool.query(`
SELECT
sp.category_raw,
COUNT(*) AS count
FROM store_products sp
JOIN dispensaries d ON d.id = sp.dispensary_id
JOIN states s ON s.id = d.state_id
WHERE s.code = $1
AND sp.category_raw IS NOT NULL
AND sp.is_in_stock = TRUE
GROUP BY sp.category_raw
ORDER BY count DESC
LIMIT 10
`, [stateCode]);
// Get top brands
const topBrandsResult = await this.pool.query(`
SELECT
sp.brand_name_raw AS brand,
COUNT(*) AS count
FROM store_products sp
JOIN dispensaries d ON d.id = sp.dispensary_id
JOIN states s ON s.id = d.state_id
WHERE s.code = $1
AND sp.brand_name_raw IS NOT NULL
AND sp.is_in_stock = TRUE
GROUP BY sp.brand_name_raw
ORDER BY count DESC
LIMIT 10
`, [stateCode]);
return {
state_code: state.code,
state_name: state.name,
legal_status: {
recreational_legal: state.recreational_legal || false,
rec_year: state.rec_year,
medical_legal: state.medical_legal || false,
med_year: state.med_year,
},
coverage: {
dispensary_count: parseInt(coverage.dispensary_count) || 0,
product_count: parseInt(coverage.product_count) || 0,
brand_count: parseInt(coverage.brand_count) || 0,
category_count: parseInt(coverage.category_count) || 0,
snapshot_count: parseInt(coverage.snapshot_count) || 0,
last_crawl_at: coverage.last_crawl_at ? coverage.last_crawl_at.toISOString() : null,
},
pricing: {
avg_price: pricing.avg_price ? parseFloat(pricing.avg_price) : null,
median_price: pricing.median_price ? parseFloat(pricing.median_price) : null,
min_price: pricing.min_price ? parseFloat(pricing.min_price) : null,
max_price: pricing.max_price ? parseFloat(pricing.max_price) : null,
},
top_categories: topCategoriesResult.rows.map((row: any) => ({
category: row.category_raw,
count: parseInt(row.count),
})),
top_brands: topBrandsResult.rows.map((row: any) => ({
brand: row.brand,
count: parseInt(row.count),
})),
};
}
/**
* Get breakdown by legal status (rec, med-only, no program)
*/
async getLegalStateBreakdown(): Promise<LegalStateBreakdown> {
// Get recreational states
const recResult = await this.pool.query(`
SELECT
s.code,
s.name,
COUNT(DISTINCT d.id) AS dispensary_count,
COUNT(DISTINCT sp.id) AS product_count,
COUNT(sps.id) AS snapshot_count
FROM states s
LEFT JOIN dispensaries d ON d.state_id = s.id
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
WHERE s.recreational_legal = TRUE
GROUP BY s.code, s.name
ORDER BY dispensary_count DESC
`);
// Get medical-only states
const medResult = await this.pool.query(`
SELECT
s.code,
s.name,
COUNT(DISTINCT d.id) AS dispensary_count,
COUNT(DISTINCT sp.id) AS product_count,
COUNT(sps.id) AS snapshot_count
FROM states s
LEFT JOIN dispensaries d ON d.state_id = s.id
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
WHERE s.medical_legal = TRUE
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
GROUP BY s.code, s.name
ORDER BY dispensary_count DESC
`);
// Get no-program states
const noProgResult = await this.pool.query(`
SELECT s.code, s.name
FROM states s
WHERE (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
AND (s.medical_legal = FALSE OR s.medical_legal IS NULL)
ORDER BY s.name
`);
const recStates = recResult.rows;
const medStates = medResult.rows;
const noProgStates = noProgResult.rows;
return {
recreational_states: {
count: recStates.length,
dispensary_count: recStates.reduce((sum, s) => sum + parseInt(s.dispensary_count), 0),
product_count: recStates.reduce((sum, s) => sum + parseInt(s.product_count), 0),
snapshot_count: recStates.reduce((sum, s) => sum + parseInt(s.snapshot_count), 0),
states: recStates.map((row: any) => ({
code: row.code,
name: row.name,
dispensary_count: parseInt(row.dispensary_count),
})),
},
medical_only_states: {
count: medStates.length,
dispensary_count: medStates.reduce((sum, s) => sum + parseInt(s.dispensary_count), 0),
product_count: medStates.reduce((sum, s) => sum + parseInt(s.product_count), 0),
snapshot_count: medStates.reduce((sum, s) => sum + parseInt(s.snapshot_count), 0),
states: medStates.map((row: any) => ({
code: row.code,
name: row.name,
dispensary_count: parseInt(row.dispensary_count),
})),
},
no_program_states: {
count: noProgStates.length,
states: noProgStates.map((row: any) => ({
code: row.code,
name: row.name,
})),
},
};
}
/**
* Get rec vs med price comparison (overall or by category)
*/
async getRecVsMedPriceComparison(category?: string): Promise<RecVsMedPriceComparison[]> {
const params: any[] = [];
let categoryFilter = '';
let groupBy = 'NULL';
if (category) {
categoryFilter = 'AND sp.category_raw = $1';
params.push(category);
groupBy = 'sp.category_raw';
} else {
groupBy = 'sp.category_raw';
}
const result = await this.pool.query(`
WITH rec_prices AS (
SELECT
${category ? 'sp.category_raw' : 'sp.category_raw'},
COUNT(DISTINCT s.code) AS state_count,
COUNT(*) AS product_count,
AVG(sp.price_rec) AS avg_price,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
FROM store_products sp
JOIN dispensaries d ON d.id = sp.dispensary_id
JOIN states s ON s.id = d.state_id
WHERE s.recreational_legal = TRUE
AND sp.price_rec IS NOT NULL
AND sp.is_in_stock = TRUE
AND sp.category_raw IS NOT NULL
${categoryFilter}
GROUP BY sp.category_raw
),
med_prices AS (
SELECT
${category ? 'sp.category_raw' : 'sp.category_raw'},
COUNT(DISTINCT s.code) AS state_count,
COUNT(*) AS product_count,
AVG(sp.price_rec) AS avg_price,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
FROM store_products sp
JOIN dispensaries d ON d.id = sp.dispensary_id
JOIN states s ON s.id = d.state_id
WHERE s.medical_legal = TRUE
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
AND sp.price_rec IS NOT NULL
AND sp.is_in_stock = TRUE
AND sp.category_raw IS NOT NULL
${categoryFilter}
GROUP BY sp.category_raw
)
SELECT
COALESCE(r.category, m.category) AS category,
r.state_count AS rec_state_count,
r.product_count AS rec_product_count,
r.avg_price AS rec_avg_price,
r.median_price AS rec_median_price,
m.state_count AS med_state_count,
m.product_count AS med_product_count,
m.avg_price AS med_avg_price,
m.median_price AS med_median_price,
CASE
WHEN r.avg_price IS NOT NULL AND m.avg_price IS NOT NULL THEN
ROUND(((r.avg_price - m.avg_price) / NULLIF(m.avg_price, 0) * 100)::NUMERIC, 2)
ELSE NULL
END AS price_diff_percent
FROM rec_prices r
FULL OUTER JOIN med_prices m ON r.category = m.category
ORDER BY COALESCE(r.product_count, 0) + COALESCE(m.product_count, 0) DESC
`, params);
return result.rows.map((row: any) => ({
category: row.category_raw,
recreational: {
state_count: parseInt(row.rec_state_count) || 0,
product_count: parseInt(row.rec_product_count) || 0,
avg_price: row.rec_avg_price ? parseFloat(row.rec_avg_price) : null,
median_price: row.rec_median_price ? parseFloat(row.rec_median_price) : null,
},
medical_only: {
state_count: parseInt(row.med_state_count) || 0,
product_count: parseInt(row.med_product_count) || 0,
avg_price: row.med_avg_price ? parseFloat(row.med_avg_price) : null,
median_price: row.med_median_price ? parseFloat(row.med_median_price) : null,
},
price_diff_percent: row.price_diff_percent ? parseFloat(row.price_diff_percent) : null,
}));
}
/**
* Get all states with coverage metrics
*/
async getAllStatesWithCoverage(): Promise<Array<{
state_code: string;
state_name: string;
recreational_legal: boolean;
medical_legal: boolean;
dispensary_count: number;
product_count: number;
brand_count: number;
last_crawl_at: string | null;
}>> {
const result = await this.pool.query(`
SELECT
s.code AS state_code,
s.name AS state_name,
COALESCE(s.recreational_legal, FALSE) AS recreational_legal,
COALESCE(s.medical_legal, FALSE) AS medical_legal,
COUNT(DISTINCT d.id) AS dispensary_count,
COUNT(DISTINCT sp.id) AS product_count,
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL) AS brand_count,
MAX(sps.captured_at) AS last_crawl_at
FROM states s
LEFT JOIN dispensaries d ON d.state_id = s.id
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
ORDER BY dispensary_count DESC, s.name
`);
return result.rows.map((row: any) => ({
state_code: row.state_code,
state_name: row.state_name,
recreational_legal: row.recreational_legal,
medical_legal: row.medical_legal,
dispensary_count: parseInt(row.dispensary_count) || 0,
product_count: parseInt(row.product_count) || 0,
brand_count: parseInt(row.brand_count) || 0,
last_crawl_at: row.last_crawl_at ? row.last_crawl_at.toISOString() : null,
}));
}
/**
* Get state coverage gaps (legal states with low/no coverage)
*/
async getStateCoverageGaps(): Promise<Array<{
state_code: string;
state_name: string;
legal_type: 'recreational' | 'medical_only';
dispensary_count: number;
has_gap: boolean;
gap_reason: string;
}>> {
const result = await this.pool.query(`
SELECT
s.code AS state_code,
s.name AS state_name,
CASE
WHEN s.recreational_legal = TRUE THEN 'recreational'
ELSE 'medical_only'
END AS legal_type,
COUNT(DISTINCT d.id) AS dispensary_count,
CASE
WHEN COUNT(DISTINCT d.id) = 0 THEN TRUE
WHEN COUNT(DISTINCT sp.id) = 0 THEN TRUE
WHEN MAX(sps.captured_at) < NOW() - INTERVAL '7 days' THEN TRUE
ELSE FALSE
END AS has_gap,
CASE
WHEN COUNT(DISTINCT d.id) = 0 THEN 'No dispensaries'
WHEN COUNT(DISTINCT sp.id) = 0 THEN 'No products'
WHEN MAX(sps.captured_at) < NOW() - INTERVAL '7 days' THEN 'Stale data (>7 days)'
ELSE 'Good coverage'
END AS gap_reason
FROM states s
LEFT JOIN dispensaries d ON d.state_id = s.id
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
WHERE s.recreational_legal = TRUE OR s.medical_legal = TRUE
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
HAVING COUNT(DISTINCT d.id) = 0
OR COUNT(DISTINCT sp.id) = 0
OR MAX(sps.captured_at) IS NULL
OR MAX(sps.captured_at) < NOW() - INTERVAL '7 days'
ORDER BY
CASE WHEN s.recreational_legal = TRUE THEN 0 ELSE 1 END,
dispensary_count DESC
`);
return result.rows.map((row: any) => ({
state_code: row.state_code,
state_name: row.state_name,
legal_type: row.legal_type,
dispensary_count: parseInt(row.dispensary_count) || 0,
has_gap: row.has_gap,
gap_reason: row.gap_reason,
}));
}
/**
* Get pricing comparison across all states
*/
async getStatePricingComparison(): Promise<Array<{
state_code: string;
state_name: string;
legal_type: 'recreational' | 'medical_only';
avg_price: number | null;
median_price: number | null;
product_count: number;
vs_national_avg_percent: number | null;
}>> {
const result = await this.pool.query(`
WITH state_prices AS (
SELECT
s.code AS state_code,
s.name AS state_name,
CASE
WHEN s.recreational_legal = TRUE THEN 'recreational'
ELSE 'medical_only'
END AS legal_type,
AVG(sp.price_rec) AS avg_price,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price,
COUNT(*) AS product_count
FROM states s
JOIN dispensaries d ON d.state_id = s.id
JOIN store_products sp ON sp.dispensary_id = d.id
WHERE sp.price_rec IS NOT NULL
AND sp.is_in_stock = TRUE
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
GROUP BY s.code, s.name, s.recreational_legal
),
national_avg AS (
SELECT AVG(price_rec) AS avg
FROM store_products
WHERE price_rec IS NOT NULL AND is_in_stock = TRUE
)
SELECT
sp.*,
ROUND(((sp.avg_price - na.avg) / NULLIF(na.avg, 0) * 100)::NUMERIC, 2) AS vs_national_avg_percent
FROM state_prices sp, national_avg na
ORDER BY sp.avg_price DESC NULLS LAST
`);
return result.rows.map((row: any) => ({
state_code: row.state_code,
state_name: row.state_name,
legal_type: row.legal_type,
avg_price: row.avg_price ? parseFloat(row.avg_price) : null,
median_price: row.median_price ? parseFloat(row.median_price) : null,
product_count: parseInt(row.product_count) || 0,
vs_national_avg_percent: row.vs_national_avg_percent ? parseFloat(row.vs_national_avg_percent) : null,
}));
}
}
export default StateAnalyticsService;