This creates a module for the function mbscasecmp(), a variant of strcasecmp() that works with multibyte strings.
The module strcase now NO LONGER takes care of providing an internalionalized strcasecmp()!! It only provides a replacement for platforms which don't have this function. 2007-02-04 Bruno Haible <[EMAIL PROTECTED]> New module mbscasecmp, reduced goal of strcasecmp. * modules/mbscasecmp: New file. * lib/mbscasecmp.c: New file, copied from lib/strcasecmp.c. (mbscasecmp): Renamed from strcasecmp. * lib/strcasecmp.c: Don't include mbuiter.h. (strcasecmp): Remove support for multibyte locales. * lib/string_.h (strcasecmp): Don`t rename. Declare only if missing. Change the conditional link warning. (mbscasecmp): New declaration. * m4/mbscasecmp.m4: New file. * m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize GNULIB_MBSCASECMP. * modules/string (string.h): Also substitute GNULIB_MBSCASECMP. * MODULES.html.sh (Internationalization functions): Add mbscasecmp. ========================== modules/mbscasecmp ============================== Description: mbscasecmp() function: case-insensitive string comparison. Files: lib/mbscasecmp.c m4/mbscasecmp.m4 m4/mbrtowc.m4 Depends-on: mbuiter string configure.ac: gl_FUNC_MBSCASECMP gl_STRING_MODULE_INDICATOR([mbscasecmp]) Makefile.am: lib_SOURCES += mbscasecmp.c Include: <string.h> License: LGPL Maintainer: Bruno Haible ============================= m4/mbscasecmp.m4 ============================= # mbscasecmp.m4 serial 1 dnl Copyright (C) 2007 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. AC_DEFUN([gl_FUNC_MBSCASECMP], [ gl_PREREQ_MBSCASECMP ]) # Prerequisites of lib/mbscasecmp.c. AC_DEFUN([gl_PREREQ_MBSCASECMP], [ AC_REQUIRE([gl_FUNC_MBRTOWC]) : ]) ============================================================================ --- MODULES.html.sh 5 Feb 2007 01:36:34 -0000 1.183 +++ MODULES.html.sh 5 Feb 2007 01:52:10 -0000 @@ -2163,6 +2163,7 @@ func_module mbschr func_module mbsrchr func_module mbsstr + func_module mbscasecmp func_module mbswidth func_module memcasecmp func_module memcoll --- lib/mbscasecmp.c 5 Feb 2007 01:40:45 -0000 1.1 +++ lib/mbscasecmp.c 5 Feb 2007 01:52:10 -0000 @@ -31,13 +31,13 @@ #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) -/* Compare strings S1 and S2, ignoring case, returning less than, equal to or - greater than zero if S1 is lexicographically less than, equal to or greater - than S2. +/* Compare the character strings S1 and S2, ignoring case, returning less than, + equal to or greater than zero if S1 is lexicographically less than, equal to + or greater than S2. Note: This function may, in multibyte locales, return 0 for strings of different lengths! */ int -strcasecmp (const char *s1, const char *s2) +mbscasecmp (const char *s1, const char *s2) { if (s1 == s2) return 0; --- lib/strcasecmp.c 26 Jan 2007 22:16:55 -0000 1.13 +++ lib/strcasecmp.c 5 Feb 2007 01:52:11 -0000 @@ -1,7 +1,5 @@ /* Case-insensitive string comparison function. Copyright (C) 1998-1999, 2005-2007 Free Software Foundation, Inc. - Written by Bruno Haible <[EMAIL PROTECTED]>, 2005, - based on earlier glibc code. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,79 +23,41 @@ #include <ctype.h> #include <limits.h> -#if HAVE_MBRTOWC -# include "mbuiter.h" -#endif - #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) /* Compare strings S1 and S2, ignoring case, returning less than, equal to or greater than zero if S1 is lexicographically less than, equal to or greater than S2. - Note: This function may, in multibyte locales, return 0 for strings of - different lengths! */ + Note: This function does not work with multibyte strings! */ + int strcasecmp (const char *s1, const char *s2) { - if (s1 == s2) + const unsigned char *p1 = (const unsigned char *) s1; + const unsigned char *p2 = (const unsigned char *) s2; + unsigned char c1, c2; + + if (p1 == p2) return 0; - /* Be careful not to look at the entire extent of s1 or s2 until needed. - This is useful because when two strings differ, the difference is - most often already in the very few first characters. */ -#if HAVE_MBRTOWC - if (MB_CUR_MAX > 1) + do { - mbui_iterator_t iter1; - mbui_iterator_t iter2; + c1 = TOLOWER (*p1); + c2 = TOLOWER (*p2); - mbui_init (iter1, s1); - mbui_init (iter2, s2); + if (c1 == '\0') + break; - while (mbui_avail (iter1) && mbui_avail (iter2)) - { - int cmp = mb_casecmp (mbui_cur (iter1), mbui_cur (iter2)); - - if (cmp != 0) - return cmp; - - mbui_advance (iter1); - mbui_advance (iter2); - } - if (mbui_avail (iter1)) - /* s2 terminated before s1. */ - return 1; - if (mbui_avail (iter2)) - /* s1 terminated before s2. */ - return -1; - return 0; + ++p1; + ++p2; } + while (c1 == c2); + + if (UCHAR_MAX <= INT_MAX) + return c1 - c2; else -#endif - { - const unsigned char *p1 = (const unsigned char *) s1; - const unsigned char *p2 = (const unsigned char *) s2; - unsigned char c1, c2; - - do - { - c1 = TOLOWER (*p1); - c2 = TOLOWER (*p2); - - if (c1 == '\0') - break; - - ++p1; - ++p2; - } - while (c1 == c2); - - if (UCHAR_MAX <= INT_MAX) - return c1 - c2; - else - /* On machines where 'char' and 'int' are types of the same size, the - difference of two 'unsigned char' values - including the sign bit - - doesn't fit in an 'int'. */ - return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); - } + /* On machines where 'char' and 'int' are types of the same size, the + difference of two 'unsigned char' values - including the sign bit - + doesn't fit in an 'int'. */ + return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); } --- lib/string_.h 5 Feb 2007 01:36:34 -0000 1.10 +++ lib/string_.h 5 Feb 2007 01:52:11 -0000 @@ -115,20 +115,17 @@ /* Compare strings S1 and S2, ignoring case, returning less than, equal to or greater than zero if S1 is lexicographically less than, equal to or greater than S2. - Note: This function may, in multibyte locales, return 0 for strings of - different lengths! - No known system has a strcasecmp() function that works correctly in - multibyte locales. Therefore use our version always, if the - strcase module is available. */ -#if @GNULIB_STRCASE@ -# if @REPLACE_STRCASECMP@ -# define strcasecmp rpl_strcasecmp -extern int strcasecmp (char const *__s1, char const *__s2); -# endif -#elif defined GNULIB_POSIXCHECK + Note: This function does not work in multibyte locales. */ +#if ! @HAVE_STRCASECMP@ +extern int strcasecmp (char const *s1, char const *s2); +#endif +#if defined GNULIB_POSIXCHECK +/* strcasecmp() does not work with multibyte strings: + POSIX says that it operates on "strings", and "string" in POSIX is defined + as a sequence of bytes, not of characters. */ # undef strcasecmp # define strcasecmp(a,b) \ - (GL_LINK_WARNING ("strcasecmp is often incorrectly implemented for multibyte locales - use gnulib module 'strcase' for correct and portable internationalization"), \ + (GL_LINK_WARNING ("strcasecmp cannot work correctly on character strings in multibyte locales - use mbscasecmp if you care about internationalization, or use c_strcasecmp (from gnulib module c-strcase) if you want a locale independent function"), \ strcasecmp (a, b)) #endif @@ -337,6 +334,16 @@ extern char * mbsstr (const char *haystack, const char *needle); #endif +#if @GNULIB_MBSCASECMP@ +/* Compare the character strings S1 and S2, ignoring case, returning less than, + equal to or greater than zero if S1 is lexicographically less than, equal to + or greater than S2. + Note: This function may, in multibyte locales, return 0 for strings of + different lengths! + Unlike strcasecmp(), this function works correctly in multibyte locales. */ +extern int mbscasecmp (const char *s1, const char *s2); +#endif + #ifdef __cplusplus } --- m4/string_h.m4 5 Feb 2007 01:36:34 -0000 1.8 +++ m4/string_h.m4 5 Feb 2007 01:52:11 -0000 @@ -71,4 +71,5 @@ GNULIB_MBSCHR=0; AC_SUBST([GNULIB_MBSCHR]) GNULIB_MBSRCHR=0; AC_SUBST([GNULIB_MBSRCHR]) GNULIB_MBSSTR=0; AC_SUBST([GNULIB_MBSSTR]) + GNULIB_MBSCASECMP=0; AC_SUBST([GNULIB_MBSCASECMP]) ]) --- modules/string 5 Feb 2007 01:36:34 -0000 1.7 +++ modules/string 5 Feb 2007 01:52:11 -0000 @@ -24,6 +24,7 @@ -e 's|@''GNULIB_MBSCHR''@|$(GNULIB_MBSCHR)|g' \ -e 's|@''GNULIB_MBSRCHR''@|$(GNULIB_MBSRCHR)|g' \ -e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \ + -e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \ -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \ -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \ -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \