This adds a new module, a variant of strchr() that works in multibyte locales.
2007-02-04 Bruno Haible <[EMAIL PROTECTED]> New module mbschr. * modules/mbschr: New file. * lib/mbschr.c: New file. * lib/string_.h (strchr): Add a conditional link warning. (mbschr): New declaration. * m4/mbschr.m4: New file. * m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize GNULIB_MBSCHR. * modules/string (string.h): Also substitute GNULIB_MBSCHR. * MODULES.html.sh (Internationalization functions): Add mbschr. ========================== modules/mbschr ================================= Description: mbschr() function: search a string for a character. Files: lib/mbschr.c m4/mbschr.m4 m4/mbrtowc.m4 Depends-on: mbuiter string configure.ac: gl_FUNC_MBSCHR gl_STRING_MODULE_INDICATOR([mbschr]) Makefile.am: lib_SOURCES += mbschr.c Include: <string.h> License: LGPL Maintainer: Bruno Haible =========================== lib/mbschr.c ================================== /* Searching a string for a character. Copyright (C) 2007 Free Software Foundation, Inc. Written by Bruno Haible <[EMAIL PROTECTED]>, 2007. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include <config.h> /* Specification. */ #include <string.h> #if HAVE_MBRTOWC # include "mbuiter.h" #endif /* Locate the first single-byte character C in the character string STRING, and return a pointer to it. Return NULL if C is not found in STRING. */ char * mbschr (const char *string, int c) { #if HAVE_MBRTOWC if (MB_CUR_MAX > 1 /* Optimization: We know that ASCII characters < 0x30 don't occur as part of multibyte characters longer than 1 byte. Hence, if c < 0x30, the faster unibyte loop can be used. */ && (unsigned char) c >= 0x30) { mbui_iterator_t iter; for (mbui_init (iter, string);; mbui_advance (iter)) { if (mb_len (mbui_cur (iter)) == 1 && (unsigned char) * mbui_cur_ptr (iter) == (unsigned char) c) break; if (!mbui_avail (iter)) goto notfound; } return (char *) mbui_cur_ptr (iter); notfound: return NULL; } else #endif return strchr (string, c); } =========================== m4/mbschr.m4 ================================== # mbschr.m4 serial 1 dnl Copyright (C) 2007 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. AC_DEFUN([gl_FUNC_MBSCHR], [ gl_PREREQ_MBSCHR ]) # Prerequisites of lib/mbschr.c. AC_DEFUN([gl_PREREQ_MBSCHR], [ AC_REQUIRE([gl_FUNC_MBRTOWC]) : ]) =========================================================================== --- MODULES.html.sh 1 Feb 2007 17:22:52 -0000 1.180 +++ MODULES.html.sh 5 Feb 2007 00:56:22 -0000 @@ -2161,6 +2161,7 @@ func_module iconvme func_module localcharset func_module hard-locale + func_module mbschr func_module mbswidth func_module memcasecmp func_module memcoll --- lib/string_.h 3 Feb 2007 00:01:56 -0000 1.7 +++ lib/string_.h 5 Feb 2007 00:56:22 -0000 @@ -148,6 +148,15 @@ strncasecmp (a, b)) #endif +#if defined GNULIB_POSIXCHECK +/* strchr() does not work with multibyte strings if the locale encoding is + GB18030 and the character to be searched is a digit. */ +# undef strchr +# define strchr(s,c) \ + (GL_LINK_WARNING ("strchr cannot work correctly on character strings in some multibyte locales - use mbschr if you care about internationalization"), \ + strchr (s, c)) +#endif + /* Find the first occurrence of C in S or the final NUL byte. */ #if @GNULIB_STRCHRNUL@ # if ! @HAVE_STRCHRNUL@ @@ -295,6 +304,20 @@ # define strtok_r strtok_r_is_unportable__use_gnulib_module_strtok_r_for_portability #endif + +/* The following functions are not specified by POSIX. They are gnulib + extensions. */ + +#if @GNULIB_MBSCHR@ +/* Locate the first single-byte character C in the character string STRING, + and return a pointer to it. Return NULL if C is not found in STRING. + Unlike strchr(), this function works correctly in multibyte locales with + encodings such as GB18030. */ +# define mbschr rpl_mbschr /* avoid collision with HP-UX function */ +extern char * mbschr (const char *string, int c); +#endif + + #ifdef __cplusplus } #endif --- m4/string_h.m4 1 Feb 2007 05:01:17 -0000 1.5 +++ m4/string_h.m4 5 Feb 2007 00:56:22 -0000 @@ -70,4 +70,5 @@ GNULIB_STRSTR=0; AC_SUBST([GNULIB_STRSTR]) GNULIB_STRCASESTR=0; AC_SUBST([GNULIB_STRCASESTR]) GNULIB_STRTOK_R=0; AC_SUBST([GNULIB_STRTOK_R]) + GNULIB_MBSCHR=0; AC_SUBST([GNULIB_MBSCHR]) ]) --- modules/string 1 Feb 2007 05:01:17 -0000 1.4 +++ modules/string 5 Feb 2007 00:56:22 -0000 @@ -21,6 +21,7 @@ rm -f [EMAIL PROTECTED] $@ { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \ sed -e 's|@''ABSOLUTE_STRING_H''@|$(ABSOLUTE_STRING_H)|g' \ + -e 's|@''GNULIB_MBSCHR''@|$(GNULIB_MBSCHR)|g' \ -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \ -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \ -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \