--- Slim/Schema.pm.original_1 2014-10-08 19:38:47.000000000 +0100 +++ Slim/Schema.pm 2014-10-08 19:51:36.000000000 +0100 @@ -36,6 +36,7 @@ use Scalar::Util qw(blessed); use Storable; use Tie::Cache::LRU::Expires; +use Unicode::Normalize qw(checkNFC NFC); use URI; use Slim::Formats; @@ -2629,6 +2630,49 @@ } if ( !main::SLIM_SERVICE ) { + + + # Bug: XXXX - Normalize contributor, album and track name tags + # to Unicode NFC to prevent creating duplicate contributor, + # album names etc. + # Some unicode characters may be represented in more than one + # way (typically accented characters also in the ISO 8859-1 + # set). If/when this occurs, we will end up with duplicated + # artist/album references in the DB, according to the + # particular sequence of unicode characters used in a given + # tag. + # Normalizing all relevant tags to a single 'canonical' form + # avoids this. NFC appears to be the most commonly used form, + # so using that form is likely to be most efficient. + + for my $tag (Slim::Schema::Contributor->contributorRoles, qw( + ARTISTSORT ALBUMARTISTSORT + ALBUM ALBUMSORT + TITLE TITLESORT + )) { + next unless defined $attributes->{$tag}; + + if (ref($attributes->{$tag}) eq 'ARRAY') { + + for (my $i = 0; $i < scalar @{$attributes->{$tag}}; $i++) { + # Checking first is apparently less expensive than just normalizing every one. + if (! checkNFC($attributes->{$tag}->[$i])) { + $log->info("Normalizing to NFC: $tag '$attributes->{$tag}->[$i]' $url"); + $attributes->{$tag}->[$i] = NFC($attributes->{$tag}->[$i]); + } + } + + } else { + + # Checking first is apparently less expensive than just normalizing every one. + if (! checkNFC($attributes->{$tag})) { + $log->info("Normalizing to NFC: $tag '$attributes->{$tag}' $url"); + $attributes->{$tag} = NFC($attributes->{$tag}); + } + } + } + + # The ARTISTSORT and ALBUMARTISTSORT tags are normalized in Contributor->add() # since the tag may need to be split. See bugs #295 and #4584. #