diff --git a/backend/migrations/099_brand_aliases.sql b/backend/migrations/099_brand_aliases.sql deleted file mode 100644 index 227e10b9..00000000 --- a/backend/migrations/099_brand_aliases.sql +++ /dev/null @@ -1,97 +0,0 @@ --- Migration: Add brand aliases table and brand normalization --- Enables case-insensitive brand matching and alias mapping - --- Create brand_aliases table to map variations to canonical brands -CREATE TABLE IF NOT EXISTS brand_aliases ( - id SERIAL PRIMARY KEY, - brand_id INTEGER NOT NULL REFERENCES brands(id) ON DELETE CASCADE, - alias VARCHAR(255) NOT NULL, -- The raw brand name variation - alias_normalized VARCHAR(255) NOT NULL, -- Lowercase, trimmed for matching - source VARCHAR(50) DEFAULT 'auto', -- 'auto' = auto-detected, 'manual' = user-added - created_at TIMESTAMPTZ DEFAULT NOW(), - - UNIQUE(alias_normalized) -); - --- Index for fast lookups -CREATE INDEX idx_brand_aliases_normalized ON brand_aliases(alias_normalized); -CREATE INDEX idx_brand_aliases_brand_id ON brand_aliases(brand_id); - --- Create function to normalize brand names for matching -CREATE OR REPLACE FUNCTION normalize_brand_name(name TEXT) -RETURNS TEXT AS $$ -BEGIN - RETURN LOWER(TRIM( - -- Remove extra spaces - REGEXP_REPLACE( - -- Remove non-alphanumeric except spaces - REGEXP_REPLACE(name, '[^a-zA-Z0-9\s]', '', 'g'), - '\s+', ' ', 'g' - ) - )); -END; -$$ LANGUAGE plpgsql IMMUTABLE; - --- Create function to find or create brand and return brand_id -CREATE OR REPLACE FUNCTION resolve_brand_id(raw_brand_name TEXT) -RETURNS INTEGER AS $$ -DECLARE - normalized_name TEXT; - found_brand_id INTEGER; -BEGIN - IF raw_brand_name IS NULL OR TRIM(raw_brand_name) = '' THEN - RETURN NULL; - END IF; - - normalized_name := normalize_brand_name(raw_brand_name); - - -- First check aliases - SELECT brand_id INTO found_brand_id - FROM brand_aliases - WHERE alias_normalized = normalized_name; - - IF found_brand_id IS NOT NULL THEN - RETURN found_brand_id; - END IF; - - -- Check brands table directly (case-insensitive) - SELECT id INTO found_brand_id - FROM brands - WHERE normalize_brand_name(name) = normalized_name; - - RETURN found_brand_id; -END; -$$ LANGUAGE plpgsql; - --- Seed brand_aliases with existing brand names -INSERT INTO brand_aliases (brand_id, alias, alias_normalized, source) -SELECT - id, - name, - normalize_brand_name(name), - 'canonical' -FROM brands -ON CONFLICT (alias_normalized) DO NOTHING; - --- Auto-generate aliases from store_products brand variations -INSERT INTO brand_aliases (brand_id, alias, alias_normalized, source) -SELECT DISTINCT ON (normalize_brand_name(sp.brand_name_raw)) - b.id, - sp.brand_name_raw, - normalize_brand_name(sp.brand_name_raw), - 'auto' -FROM store_products sp -CROSS JOIN LATERAL ( - SELECT id, name - FROM brands - WHERE normalize_brand_name(brands.name) = normalize_brand_name(sp.brand_name_raw) - LIMIT 1 -) b -WHERE sp.brand_name_raw IS NOT NULL - AND sp.brand_name_raw != '' -ON CONFLICT (alias_normalized) DO NOTHING; - --- Add comment -COMMENT ON TABLE brand_aliases IS 'Maps brand name variations to canonical brand IDs'; -COMMENT ON FUNCTION normalize_brand_name(TEXT) IS 'Normalizes brand name for case-insensitive matching'; -COMMENT ON FUNCTION resolve_brand_id(TEXT) IS 'Returns brand_id for a raw brand name, using aliases'; diff --git a/backend/migrations/100_backfill_brand_ids.sql b/backend/migrations/100_backfill_brand_ids.sql deleted file mode 100644 index 874b639c..00000000 --- a/backend/migrations/100_backfill_brand_ids.sql +++ /dev/null @@ -1,41 +0,0 @@ --- Migration: Backfill brand_id in store_products using brand aliases --- Links products to canonical brands via case-insensitive matching - --- Update store_products.brand_id using the resolve_brand_id function -UPDATE store_products sp -SET brand_id = resolve_brand_id(sp.brand_name_raw) -WHERE sp.brand_name_raw IS NOT NULL - AND sp.brand_name_raw != '' - AND (sp.brand_id IS NULL OR sp.brand_id != resolve_brand_id(sp.brand_name_raw)); - --- Create trigger to auto-link brand_id on insert/update -CREATE OR REPLACE FUNCTION auto_link_brand_id() -RETURNS TRIGGER AS $$ -BEGIN - IF NEW.brand_name_raw IS NOT NULL AND NEW.brand_name_raw != '' THEN - NEW.brand_id := COALESCE(resolve_brand_id(NEW.brand_name_raw), NEW.brand_id); - END IF; - RETURN NEW; -END; -$$ LANGUAGE plpgsql; - --- Drop trigger if exists and recreate -DROP TRIGGER IF EXISTS trg_auto_link_brand_id ON store_products; -CREATE TRIGGER trg_auto_link_brand_id - BEFORE INSERT OR UPDATE OF brand_name_raw ON store_products - FOR EACH ROW - EXECUTE FUNCTION auto_link_brand_id(); - --- Log the results -DO $$ -DECLARE - linked_count INTEGER; - unlinked_count INTEGER; -BEGIN - SELECT COUNT(*) INTO linked_count FROM store_products WHERE brand_id IS NOT NULL; - SELECT COUNT(*) INTO unlinked_count FROM store_products WHERE brand_id IS NULL AND brand_name_raw IS NOT NULL; - RAISE NOTICE 'Brand linking complete: % products linked, % products unlinked', linked_count, unlinked_count; -END $$; - -COMMENT ON FUNCTION auto_link_brand_id() IS 'Auto-links brand_id when brand_name_raw is set'; -COMMENT ON TRIGGER trg_auto_link_brand_id ON store_products IS 'Auto-links products to canonical brands on insert/update';