Files
cannaiq/backend/migrations/099_brand_aliases.sql
Kelly e826a4dd3e fix: Add consecutive_failures column to migration
🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 01:20:59 -07:00

98 lines
3.1 KiB
PL/PgSQL

-- Migration: Add brand aliases table and brand normalization
-- Enables case-insensitive brand matching and alias mapping
-- Create brand_aliases table to map variations to canonical brands
CREATE TABLE IF NOT EXISTS brand_aliases (
id SERIAL PRIMARY KEY,
brand_id INTEGER NOT NULL REFERENCES brands(id) ON DELETE CASCADE,
alias VARCHAR(255) NOT NULL, -- The raw brand name variation
alias_normalized VARCHAR(255) NOT NULL, -- Lowercase, trimmed for matching
source VARCHAR(50) DEFAULT 'auto', -- 'auto' = auto-detected, 'manual' = user-added
created_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(alias_normalized)
);
-- Index for fast lookups
CREATE INDEX idx_brand_aliases_normalized ON brand_aliases(alias_normalized);
CREATE INDEX idx_brand_aliases_brand_id ON brand_aliases(brand_id);
-- Create function to normalize brand names for matching
CREATE OR REPLACE FUNCTION normalize_brand_name(name TEXT)
RETURNS TEXT AS $$
BEGIN
RETURN LOWER(TRIM(
-- Remove extra spaces
REGEXP_REPLACE(
-- Remove non-alphanumeric except spaces
REGEXP_REPLACE(name, '[^a-zA-Z0-9\s]', '', 'g'),
'\s+', ' ', 'g'
)
));
END;
$$ LANGUAGE plpgsql IMMUTABLE;
-- Create function to find or create brand and return brand_id
CREATE OR REPLACE FUNCTION resolve_brand_id(raw_brand_name TEXT)
RETURNS INTEGER AS $$
DECLARE
normalized_name TEXT;
found_brand_id INTEGER;
BEGIN
IF raw_brand_name IS NULL OR TRIM(raw_brand_name) = '' THEN
RETURN NULL;
END IF;
normalized_name := normalize_brand_name(raw_brand_name);
-- First check aliases
SELECT brand_id INTO found_brand_id
FROM brand_aliases
WHERE alias_normalized = normalized_name;
IF found_brand_id IS NOT NULL THEN
RETURN found_brand_id;
END IF;
-- Check brands table directly (case-insensitive)
SELECT id INTO found_brand_id
FROM brands
WHERE normalize_brand_name(name) = normalized_name;
RETURN found_brand_id;
END;
$$ LANGUAGE plpgsql;
-- Seed brand_aliases with existing brand names
INSERT INTO brand_aliases (brand_id, alias, alias_normalized, source)
SELECT
id,
name,
normalize_brand_name(name),
'canonical'
FROM brands
ON CONFLICT (alias_normalized) DO NOTHING;
-- Auto-generate aliases from store_products brand variations
INSERT INTO brand_aliases (brand_id, alias, alias_normalized, source)
SELECT DISTINCT ON (normalize_brand_name(sp.brand_name_raw))
b.id,
sp.brand_name_raw,
normalize_brand_name(sp.brand_name_raw),
'auto'
FROM store_products sp
CROSS JOIN LATERAL (
SELECT id, name
FROM brands
WHERE normalize_brand_name(brands.name) = normalize_brand_name(sp.brand_name_raw)
LIMIT 1
) b
WHERE sp.brand_name_raw IS NOT NULL
AND sp.brand_name_raw != ''
ON CONFLICT (alias_normalized) DO NOTHING;
-- Add comment
COMMENT ON TABLE brand_aliases IS 'Maps brand name variations to canonical brand IDs';
COMMENT ON FUNCTION normalize_brand_name(TEXT) IS 'Normalizes brand name for case-insensitive matching';
COMMENT ON FUNCTION resolve_brand_id(TEXT) IS 'Returns brand_id for a raw brand name, using aliases';