/** * StateAnalyticsService * * Analytics for state-level market data and comparisons. * * Data Sources: * - states: Legal status, year of legalization * - dispensaries: Store counts by state * - store_products: Product/brand coverage by state * - store_product_snapshots: Historical data depth * * Key Metrics: * - Legal state breakdown (rec, med-only, illegal) * - Coverage by state (dispensaries, products, brands) * - Rec vs Med price comparisons * - Data freshness per state */ import { Pool } from 'pg'; import { StateMarketSummary, LegalStateBreakdown, RecVsMedPriceComparison, LegalType, getLegalTypeFilter, } from './types'; export class StateAnalyticsService { constructor(private pool: Pool) {} // ============================================================ // HELPER METHODS FOR LEGAL TYPE FILTERING // ============================================================ /** * Get recreational-only state codes */ async getRecreationalStates(): Promise { const result = await this.pool.query(` SELECT code FROM states WHERE recreational_legal = TRUE ORDER BY code `); return result.rows.map((r: any) => r.code); } /** * Get medical-only state codes (not recreational) */ async getMedicalOnlyStates(): Promise { const result = await this.pool.query(` SELECT code FROM states WHERE medical_legal = TRUE AND (recreational_legal = FALSE OR recreational_legal IS NULL) ORDER BY code `); return result.rows.map((r: any) => r.code); } /** * Get no-program state codes */ async getNoProgramStates(): Promise { const result = await this.pool.query(` SELECT code FROM states WHERE (recreational_legal = FALSE OR recreational_legal IS NULL) AND (medical_legal = FALSE OR medical_legal IS NULL) ORDER BY code `); return result.rows.map((r: any) => r.code); } /** * Get state IDs by legal type for use in subqueries */ async getStateIdsByLegalType(legalType: LegalType): Promise { const filter = getLegalTypeFilter(legalType); const result = await this.pool.query(` SELECT s.id FROM states s WHERE ${filter} ORDER BY s.id `); return result.rows.map((r: any) => r.id); } /** * Get market summary for a specific state */ async getStateMarketSummary(stateCode: string): Promise { // Get state info const stateResult = await this.pool.query(` SELECT s.id, s.code, s.name, s.recreational_legal, s.rec_year, s.medical_legal, s.med_year FROM states s WHERE s.code = $1 `, [stateCode]); if (stateResult.rows.length === 0) { return null; } const state = stateResult.rows[0]; // Get coverage metrics const coverageResult = await this.pool.query(` SELECT COUNT(DISTINCT d.id) AS dispensary_count, COUNT(DISTINCT sp.id) AS product_count, COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL) AS brand_count, COUNT(DISTINCT sp.category_raw) FILTER (WHERE sp.category_raw IS NOT NULL) AS category_count, COUNT(sps.id) AS snapshot_count, MAX(sps.captured_at) AS last_crawl_at FROM states s LEFT JOIN dispensaries d ON d.state_id = s.id LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id WHERE s.code = $1 `, [stateCode]); const coverage = coverageResult.rows[0]; // Get pricing metrics const pricingResult = await this.pool.query(` SELECT AVG(price_rec) AS avg_price, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY price_rec) AS median_price, MIN(price_rec) AS min_price, MAX(price_rec) AS max_price FROM store_products sp JOIN dispensaries d ON d.id = sp.dispensary_id JOIN states s ON s.id = d.state_id WHERE s.code = $1 AND sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE `, [stateCode]); const pricing = pricingResult.rows[0]; // Get top categories const topCategoriesResult = await this.pool.query(` SELECT sp.category_raw, COUNT(*) AS count FROM store_products sp JOIN dispensaries d ON d.id = sp.dispensary_id JOIN states s ON s.id = d.state_id WHERE s.code = $1 AND sp.category_raw IS NOT NULL AND sp.is_in_stock = TRUE GROUP BY sp.category_raw ORDER BY count DESC LIMIT 10 `, [stateCode]); // Get top brands const topBrandsResult = await this.pool.query(` SELECT sp.brand_name_raw AS brand, COUNT(*) AS count FROM store_products sp JOIN dispensaries d ON d.id = sp.dispensary_id JOIN states s ON s.id = d.state_id WHERE s.code = $1 AND sp.brand_name_raw IS NOT NULL AND sp.is_in_stock = TRUE GROUP BY sp.brand_name_raw ORDER BY count DESC LIMIT 10 `, [stateCode]); return { state_code: state.code, state_name: state.name, legal_status: { recreational_legal: state.recreational_legal || false, rec_year: state.rec_year, medical_legal: state.medical_legal || false, med_year: state.med_year, }, coverage: { dispensary_count: parseInt(coverage.dispensary_count) || 0, product_count: parseInt(coverage.product_count) || 0, brand_count: parseInt(coverage.brand_count) || 0, category_count: parseInt(coverage.category_count) || 0, snapshot_count: parseInt(coverage.snapshot_count) || 0, last_crawl_at: coverage.last_crawl_at ? coverage.last_crawl_at.toISOString() : null, }, pricing: { avg_price: pricing.avg_price ? parseFloat(pricing.avg_price) : null, median_price: pricing.median_price ? parseFloat(pricing.median_price) : null, min_price: pricing.min_price ? parseFloat(pricing.min_price) : null, max_price: pricing.max_price ? parseFloat(pricing.max_price) : null, }, top_categories: topCategoriesResult.rows.map((row: any) => ({ category: row.category_raw, count: parseInt(row.count), })), top_brands: topBrandsResult.rows.map((row: any) => ({ brand: row.brand, count: parseInt(row.count), })), }; } /** * Get breakdown by legal status (rec, med-only, no program) */ async getLegalStateBreakdown(): Promise { // Get recreational states const recResult = await this.pool.query(` SELECT s.code, s.name, COUNT(DISTINCT d.id) AS dispensary_count, COUNT(DISTINCT sp.id) AS product_count, COUNT(sps.id) AS snapshot_count FROM states s LEFT JOIN dispensaries d ON d.state_id = s.id LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id WHERE s.recreational_legal = TRUE GROUP BY s.code, s.name ORDER BY dispensary_count DESC `); // Get medical-only states const medResult = await this.pool.query(` SELECT s.code, s.name, COUNT(DISTINCT d.id) AS dispensary_count, COUNT(DISTINCT sp.id) AS product_count, COUNT(sps.id) AS snapshot_count FROM states s LEFT JOIN dispensaries d ON d.state_id = s.id LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id WHERE s.medical_legal = TRUE AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL) GROUP BY s.code, s.name ORDER BY dispensary_count DESC `); // Get no-program states const noProgResult = await this.pool.query(` SELECT s.code, s.name FROM states s WHERE (s.recreational_legal = FALSE OR s.recreational_legal IS NULL) AND (s.medical_legal = FALSE OR s.medical_legal IS NULL) ORDER BY s.name `); const recStates = recResult.rows; const medStates = medResult.rows; const noProgStates = noProgResult.rows; return { recreational_states: { count: recStates.length, dispensary_count: recStates.reduce((sum, s) => sum + parseInt(s.dispensary_count), 0), product_count: recStates.reduce((sum, s) => sum + parseInt(s.product_count), 0), snapshot_count: recStates.reduce((sum, s) => sum + parseInt(s.snapshot_count), 0), states: recStates.map((row: any) => ({ code: row.code, name: row.name, dispensary_count: parseInt(row.dispensary_count), })), }, medical_only_states: { count: medStates.length, dispensary_count: medStates.reduce((sum, s) => sum + parseInt(s.dispensary_count), 0), product_count: medStates.reduce((sum, s) => sum + parseInt(s.product_count), 0), snapshot_count: medStates.reduce((sum, s) => sum + parseInt(s.snapshot_count), 0), states: medStates.map((row: any) => ({ code: row.code, name: row.name, dispensary_count: parseInt(row.dispensary_count), })), }, no_program_states: { count: noProgStates.length, states: noProgStates.map((row: any) => ({ code: row.code, name: row.name, })), }, }; } /** * Get rec vs med price comparison (overall or by category) */ async getRecVsMedPriceComparison(category?: string): Promise { const params: any[] = []; let categoryFilter = ''; let groupBy = 'NULL'; if (category) { categoryFilter = 'AND sp.category_raw = $1'; params.push(category); groupBy = 'sp.category_raw'; } else { groupBy = 'sp.category_raw'; } const result = await this.pool.query(` WITH rec_prices AS ( SELECT ${category ? 'sp.category_raw' : 'sp.category_raw'}, COUNT(DISTINCT s.code) AS state_count, COUNT(*) AS product_count, AVG(sp.price_rec) AS avg_price, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price FROM store_products sp JOIN dispensaries d ON d.id = sp.dispensary_id JOIN states s ON s.id = d.state_id WHERE s.recreational_legal = TRUE AND sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE AND sp.category_raw IS NOT NULL ${categoryFilter} GROUP BY sp.category_raw ), med_prices AS ( SELECT ${category ? 'sp.category_raw' : 'sp.category_raw'}, COUNT(DISTINCT s.code) AS state_count, COUNT(*) AS product_count, AVG(sp.price_rec) AS avg_price, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price FROM store_products sp JOIN dispensaries d ON d.id = sp.dispensary_id JOIN states s ON s.id = d.state_id WHERE s.medical_legal = TRUE AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL) AND sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE AND sp.category_raw IS NOT NULL ${categoryFilter} GROUP BY sp.category_raw ) SELECT COALESCE(r.category, m.category) AS category, r.state_count AS rec_state_count, r.product_count AS rec_product_count, r.avg_price AS rec_avg_price, r.median_price AS rec_median_price, m.state_count AS med_state_count, m.product_count AS med_product_count, m.avg_price AS med_avg_price, m.median_price AS med_median_price, CASE WHEN r.avg_price IS NOT NULL AND m.avg_price IS NOT NULL THEN ROUND(((r.avg_price - m.avg_price) / NULLIF(m.avg_price, 0) * 100)::NUMERIC, 2) ELSE NULL END AS price_diff_percent FROM rec_prices r FULL OUTER JOIN med_prices m ON r.category = m.category ORDER BY COALESCE(r.product_count, 0) + COALESCE(m.product_count, 0) DESC `, params); return result.rows.map((row: any) => ({ category: row.category_raw, recreational: { state_count: parseInt(row.rec_state_count) || 0, product_count: parseInt(row.rec_product_count) || 0, avg_price: row.rec_avg_price ? parseFloat(row.rec_avg_price) : null, median_price: row.rec_median_price ? parseFloat(row.rec_median_price) : null, }, medical_only: { state_count: parseInt(row.med_state_count) || 0, product_count: parseInt(row.med_product_count) || 0, avg_price: row.med_avg_price ? parseFloat(row.med_avg_price) : null, median_price: row.med_median_price ? parseFloat(row.med_median_price) : null, }, price_diff_percent: row.price_diff_percent ? parseFloat(row.price_diff_percent) : null, })); } /** * Get all states with coverage metrics */ async getAllStatesWithCoverage(): Promise> { const result = await this.pool.query(` SELECT s.code AS state_code, s.name AS state_name, COALESCE(s.recreational_legal, FALSE) AS recreational_legal, COALESCE(s.medical_legal, FALSE) AS medical_legal, COUNT(DISTINCT d.id) AS dispensary_count, COUNT(DISTINCT sp.id) AS product_count, COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL) AS brand_count, MAX(sps.captured_at) AS last_crawl_at FROM states s LEFT JOIN dispensaries d ON d.state_id = s.id LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal ORDER BY dispensary_count DESC, s.name `); return result.rows.map((row: any) => ({ state_code: row.state_code, state_name: row.state_name, recreational_legal: row.recreational_legal, medical_legal: row.medical_legal, dispensary_count: parseInt(row.dispensary_count) || 0, product_count: parseInt(row.product_count) || 0, brand_count: parseInt(row.brand_count) || 0, last_crawl_at: row.last_crawl_at ? row.last_crawl_at.toISOString() : null, })); } /** * Get state coverage gaps (legal states with low/no coverage) */ async getStateCoverageGaps(): Promise> { const result = await this.pool.query(` SELECT s.code AS state_code, s.name AS state_name, CASE WHEN s.recreational_legal = TRUE THEN 'recreational' ELSE 'medical_only' END AS legal_type, COUNT(DISTINCT d.id) AS dispensary_count, CASE WHEN COUNT(DISTINCT d.id) = 0 THEN TRUE WHEN COUNT(DISTINCT sp.id) = 0 THEN TRUE WHEN MAX(sps.captured_at) < NOW() - INTERVAL '7 days' THEN TRUE ELSE FALSE END AS has_gap, CASE WHEN COUNT(DISTINCT d.id) = 0 THEN 'No dispensaries' WHEN COUNT(DISTINCT sp.id) = 0 THEN 'No products' WHEN MAX(sps.captured_at) < NOW() - INTERVAL '7 days' THEN 'Stale data (>7 days)' ELSE 'Good coverage' END AS gap_reason FROM states s LEFT JOIN dispensaries d ON d.state_id = s.id LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id WHERE s.recreational_legal = TRUE OR s.medical_legal = TRUE GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal HAVING COUNT(DISTINCT d.id) = 0 OR COUNT(DISTINCT sp.id) = 0 OR MAX(sps.captured_at) IS NULL OR MAX(sps.captured_at) < NOW() - INTERVAL '7 days' ORDER BY CASE WHEN s.recreational_legal = TRUE THEN 0 ELSE 1 END, dispensary_count DESC `); return result.rows.map((row: any) => ({ state_code: row.state_code, state_name: row.state_name, legal_type: row.legal_type, dispensary_count: parseInt(row.dispensary_count) || 0, has_gap: row.has_gap, gap_reason: row.gap_reason, })); } /** * Get pricing comparison across all states */ async getStatePricingComparison(): Promise> { const result = await this.pool.query(` WITH state_prices AS ( SELECT s.code AS state_code, s.name AS state_name, CASE WHEN s.recreational_legal = TRUE THEN 'recreational' ELSE 'medical_only' END AS legal_type, AVG(sp.price_rec) AS avg_price, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price, COUNT(*) AS product_count FROM states s JOIN dispensaries d ON d.state_id = s.id JOIN store_products sp ON sp.dispensary_id = d.id WHERE sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE) GROUP BY s.code, s.name, s.recreational_legal ), national_avg AS ( SELECT AVG(price_rec) AS avg FROM store_products WHERE price_rec IS NOT NULL AND is_in_stock = TRUE ) SELECT sp.*, ROUND(((sp.avg_price - na.avg) / NULLIF(na.avg, 0) * 100)::NUMERIC, 2) AS vs_national_avg_percent FROM state_prices sp, national_avg na ORDER BY sp.avg_price DESC NULLS LAST `); return result.rows.map((row: any) => ({ state_code: row.state_code, state_name: row.state_name, legal_type: row.legal_type, avg_price: row.avg_price ? parseFloat(row.avg_price) : null, median_price: row.median_price ? parseFloat(row.median_price) : null, product_count: parseInt(row.product_count) || 0, vs_national_avg_percent: row.vs_national_avg_percent ? parseFloat(row.vs_national_avg_percent) : null, })); } } export default StateAnalyticsService;