first, it is stupid to blame about names which are valid. it is also stupid that taking care of each occurrences coming up. as pages are all utf-8 now, no need to keep such references, this patch restores original characters instead of numeric references
patch below: Index: english/international/l10n/scripts/gen-files.pl =================================================================== --- english/international/l10n/scripts/gen-files.pl (revision 232) +++ english/international/l10n/scripts/gen-files.pl (working copy) @@ -3,6 +3,7 @@ use strict; use File::Path; use Getopt::Long; +use Encode qw(encode); use lib ($0 =~ m|(.*)/|, $1 or ".") ."/../../../../Perl"; @@ -117,8 +118,7 @@ $name =~ s/\s*<.*//; $name =~ s/&(?!#)/&/g; $name =~ s/=\?.*?\?=//g; - # BREAK PERMITTED HERE (U+0082) is not allowed in HTML 4.01. - $name =~ s/(?:�*130;|�*82;|\N{U+0082})//ig; + $name =~ s/&#(\d+);/encode("UTF-8",chr($1))/ge; $name = 'DDTP' if $name eq 'Debian Description Translation Project'; $name = '' if $name =~ m/\@/; return $name; -- victory no need to CC me :-)