Hi, In basic regular expressions, range expressions are not safe to use outside the C locale; the results vary between implementations and locales.
- For 'grep' this was explained in https://lists.gnu.org/archive/html/bug-grep/2011-06/msg00031.html https://lists.gnu.org/archive/html/bug-grep/2012-01/msg00088.html http://savannah.gnu.org/bugs/?32337 - For 'tr' I'm seeing this on Solaris 11 2011-11 in de_DE.UTF-8 locale: $ echo abcdefghijklmnopqrstuvwxyz | /usr/bin/tr 'a-z' 'A-Z' AbcdefghijklmnopqrstuvwxyZ $ echo abcdefghijklmnopqrstuvwxyz | /usr/bin/tr '[a-z]' '[A-Z]' ABCDEFGHIJKLMNOPQRSTUVWXYZ $ echo abcdefghijklmnopqrstuvwxyz | /usr/xpg4/bin/tr 'a-z' 'A-Z' ABⓒ𝚍ⓔFGH𝙞JK𝚕ⓜNOPQⓡSTUⓥWⓧYZ $ echo abcdefghijklmnopqrstuvwxyz | /usr/xpg4/bin/tr '[a-z]' '[A-Z]' ABⓒ𝚍ⓔFGH𝙞JK𝚕ⓜNOPQⓡSTUⓥWⓧYZ $ echo abcdefghijklmnopqrstuvwxyz | /usr/xpg6/bin/tr 'a-z' 'A-Z' ABⓒ𝚍ⓔFGH𝙞JK𝚕ⓜNOPQⓡSTUⓥWⓧYZ $ echo abcdefghijklmnopqrstuvwxyz | /usr/xpg6/bin/tr '[a-z]' '[A-Z]' ABⓒ𝚍ⓔFGH𝙞JK𝚕ⓜNOPQⓡSTUⓥWⓧYZ Whereas in C locale: $ echo abcdefghijklmnopqrstuvwxyz | LC_ALL=C /usr/bin/tr 'a-z' 'A-Z' AbcdefghijklmnopqrstuvwxyZ $ echo abcdefghijklmnopqrstuvwxyz | LC_ALL=C /usr/bin/tr '[a-z]' '[A-Z]' ABCDEFGHIJKLMNOPQRSTUVWXYZ $ echo abcdefghijklmnopqrstuvwxyz | LC_ALL=C /usr/xpg4/bin/tr 'a-z' 'A-Z' ABCDEFGHIJKLMNOPQRSTUVWXYZ $ echo abcdefghijklmnopqrstuvwxyz | LC_ALL=C /usr/xpg4/bin/tr '[a-z]' '[A-Z]' ABCDEFGHIJKLMNOPQRSTUVWXYZ $ echo abcdefghijklmnopqrstuvwxyz | LC_ALL=C /usr/xpg6/bin/tr 'a-z' 'A-Z' ABCDEFGHIJKLMNOPQRSTUVWXYZ $ echo abcdefghijklmnopqrstuvwxyz | LC_ALL=C /usr/xpg6/bin/tr '[a-z]' '[A-Z]' ABCDEFGHIJKLMNOPQRSTUVWXYZ Gnulib uses this idiom in these files: tests/test-pipe-filter-gi1.c:29:/* Pipe a text file through 'tr "[a-z]" "[A-Z]"', which converts ASCII tests/test-pipe-filter-gi1.c:100: argv[2] = "[A-Z]"; tests/test-pipe-filter-ii1.c:29:/* Pipe a text file through 'tr "[a-z]" "[A-Z]"', which converts ASCII tests/test-pipe-filter-ii1.c:123: argv[2] = "[A-Z]"; Fixed through the patch below. build-aux/bootstrap:426: appvar=`echo $app | tr '[a-z]-' '[A-Z]_'` m4/host-os.m4:69: expr "X$host_os" : 'X\([A-Za-z]\)' | tr '[a-z]' '[A-Z]' m4/fnmatch.m4:24: echo $gl_fnmatch_required | tr '[[A-Z]]' '[[a-z]]' Although in these situations LC_ALL is already set to C, it's useful for consistency between these scripts and other scripts to systematically set LC_ALL=C before such 'tr' commands. top/maint.mk:347: | grep -E '"[A-Z]' \ Is this code meant to catch non-ASCII uppercase error messages as well? 2012-02-19 Bruno Haible <br...@clisp.org> Fix test failure in many locales on Solaris 11. * tests/test-pipe-filter-gi1.c (main): Don't use range expression in 'tr' arguments. * tests/test-pipe-filter-ii1.c (main): Likewise. * build-aux/bootstrap (check_versions): Run 'tr' command with range expressions in the C locale. * m4/fnmatch.m4 (gl_FUNC_FNMATCH_POSIX): Likewise. * m4/host-os.m4 (gl_HOST_OS): Likewise. --- build-aux/bootstrap.orig Sun Feb 19 15:02:49 2012 +++ build-aux/bootstrap Sun Feb 19 14:53:08 2012 @@ -423,7 +423,7 @@ $use_git || continue fi # Honor $APP variables ($TAR, $AUTOCONF, etc.) - appvar=`echo $app | tr '[a-z]-' '[A-Z]_'` + appvar=`echo $app | LC_ALL=C tr '[a-z]-' '[A-Z]_'` test "$appvar" = TAR && appvar=AMTAR case $appvar in GZIP) ;; # Do not use $GZIP: it contains gzip options. --- m4/fnmatch.m4.orig Sun Feb 19 15:02:49 2012 +++ m4/fnmatch.m4 Sun Feb 19 14:53:33 2012 @@ -1,4 +1,4 @@ -# Check for fnmatch - serial 8. +# Check for fnmatch - serial 9. # Copyright (C) 2000-2007, 2009-2012 Free Software Foundation, Inc. # This file is free software; the Free Software Foundation @@ -21,7 +21,7 @@ FNMATCH_H= gl_fnmatch_required_lowercase=` - echo $gl_fnmatch_required | tr '[[A-Z]]' '[[a-z]]' + echo $gl_fnmatch_required | LC_ALL=C tr '[[A-Z]]' '[[a-z]]' ` gl_fnmatch_cache_var="gl_cv_func_fnmatch_${gl_fnmatch_required_lowercase}" AC_CACHE_CHECK([for working $gl_fnmatch_required fnmatch], --- m4/host-os.m4.orig Sun Feb 19 15:02:49 2012 +++ m4/host-os.m4 Sun Feb 19 14:54:02 2012 @@ -1,4 +1,4 @@ -# serial 8 +# serial 9 # Copyright (C) 2001, 2003-2004, 2006, 2009-2012 Free Software Foundation, Inc. # This file is free software; the Free Software Foundation @@ -66,7 +66,7 @@ # from $host_os, but capitalizes its first letter. [A-Za-z]*) os=` - expr "X$host_os" : 'X\([A-Za-z]\)' | tr '[a-z]' '[A-Z]' + expr "X$host_os" : 'X\([A-Za-z]\)' | LC_ALL=C tr '[a-z]' '[A-Z]' `` expr "X$host_os" : 'X.\([A-Za-z]*\)' ` --- tests/test-pipe-filter-gi1.c.orig Sun Feb 19 15:02:49 2012 +++ tests/test-pipe-filter-gi1.c Sun Feb 19 15:02:25 2012 @@ -26,8 +26,9 @@ #include "macros.h" -/* Pipe a text file through 'tr "[a-z]" "[A-Z]"', which converts ASCII - characters from lower case to upper case. */ +/* Pipe a text file through 'LC_ALL=C tr "[a-z]" "[A-Z]"', or equivalently, + 'tr "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ"', which + converts ASCII characters from lower case to upper case. */ struct locals { @@ -96,8 +97,8 @@ l.nread = 0; argv[0] = tr_program; - argv[1] = "[a-z]"; - argv[2] = "[A-Z]"; + argv[1] = "abcdefghijklmnopqrstuvwxyz"; + argv[2] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; argv[3] = NULL; f = pipe_filter_gi_create ("tr", tr_program, argv, false, true, --- tests/test-pipe-filter-ii1.c.orig Sun Feb 19 15:02:49 2012 +++ tests/test-pipe-filter-ii1.c Sun Feb 19 15:02:33 2012 @@ -26,8 +26,9 @@ #include "macros.h" -/* Pipe a text file through 'tr "[a-z]" "[A-Z]"', which converts ASCII - characters from lower case to upper case. */ +/* Pipe a text file through 'LC_ALL=C tr "[a-z]" "[A-Z]"', or equivalently, + 'tr "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ"', which + converts ASCII characters from lower case to upper case. */ struct locals { @@ -119,8 +120,8 @@ l.nread = 0; argv[0] = tr_program; - argv[1] = "[a-z]"; - argv[2] = "[A-Z]"; + argv[1] = "abcdefghijklmnopqrstuvwxyz"; + argv[2] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; argv[3] = NULL; result = pipe_filter_ii_execute ("tr", tr_program, argv, false, true,