The function mbspbrk() is like strpbrk(), except that it works also in multibyte locales.
2007-02-04 Bruno Haible <[EMAIL PROTECTED]> New module mbspbrk. * modules/mbspbrk: New file. * lib/mbspbrk.c: New file. * lib/string_.h (strpbrk): Add a conditional link warning. (mbspbrk): New declaration. * m4/mbspbrk.m4: New file. * m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize GNULIB_MBSPBRK. * modules/string (string.h): Also substitute GNULIB_MBSPBRK. * MODULES.html.sh (Internationalization functions): Add mbspbrk. ============================== modules/mbspbrk ============================== Description: mbspbrk() function: search a string for any of a set of characters. Files: lib/mbspbrk.c m4/mbspbrk.m4 m4/mbrtowc.m4 Depends-on: mbuiter string mbschr strpbrk configure.ac: gl_FUNC_MBSPBRK gl_STRING_MODULE_INDICATOR([mbspbrk]) Makefile.am: lib_SOURCES += mbspbrk.c Include: <string.h> License: LGPL Maintainer: Bruno Haible =============================== lib/mbspbrk.c =============================== /* Searching a string for a character among a given set of characters. Copyright (C) 1999, 2002, 2006-2007 Free Software Foundation, Inc. Written by Bruno Haible <[EMAIL PROTECTED]>, 2007. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include <config.h> /* Specification. */ #include <string.h> #if HAVE_MBRTOWC # include "mbuiter.h" #endif /* Find the first occurrence in the character string STRING of any character in the character string ACCEPT. Return the pointer to it, or NULL if none exists. */ char * mbspbrk (const char *string, const char *accept) { /* Optimize two cases. */ if (accept[0] == '\0') return NULL; if (accept[1] == '\0') return mbschr (string, accept[0]); /* General case. */ #if HAVE_MBRTOWC if (MB_CUR_MAX > 1) { mbui_iterator_t iter; for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter)) { if (mb_len (mbui_cur (iter)) == 1) { if (mbschr (accept, (unsigned char) * mbui_cur_ptr (iter))) return (char *) mbui_cur_ptr (iter); } else { mbui_iterator_t aiter; for (mbui_init (aiter, accept); mbui_avail (aiter); mbui_advance (aiter)) if (mb_equal (mbui_cur (aiter), mbui_cur (iter))) return (char *) mbui_cur_ptr (iter); } } return NULL; } else #endif return strpbrk (string, accept); } =============================== m4/mbspbrk.m4 =============================== # mbspbrk.m4 serial 1 dnl Copyright (C) 2007 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. AC_DEFUN([gl_FUNC_MBSPBRK], [ gl_PREREQ_MBSPBRK ]) # Prerequisites of lib/mbspbrk.c. AC_DEFUN([gl_PREREQ_MBSPBRK], [ AC_REQUIRE([gl_FUNC_MBRTOWC]) : ]) ============================================================================= --- MODULES.html.sh 5 Feb 2007 03:06:40 -0000 1.186 +++ MODULES.html.sh 5 Feb 2007 03:11:33 -0000 @@ -2166,6 +2166,7 @@ func_module mbscasecmp func_module mbscasestr func_module mbscspn + func_module mbspbrk func_module mbswidth func_module memcasecmp func_module memcoll --- lib/string_.h 5 Feb 2007 03:06:40 -0000 1.14 +++ lib/string_.h 5 Feb 2007 03:11:33 -0000 @@ -217,6 +217,16 @@ # if ! @HAVE_STRPBRK@ extern char *strpbrk (char const *__s, char const *__accept); # endif +# if defined GNULIB_POSIXCHECK +/* strpbrk() assumes the second argument is a list of single-byte characters. + Even in this simple case, it does not work with multibyte strings if the + locale encoding is GB18030 and one of the characters to be searched is a + digit. */ +# undef strpbrk +# define strpbrk(s,a) \ + (GL_LINK_WARNING ("strpbrk cannot work correctly on character strings in multibyte locales - use mbspbrk if you care about internationalization"), \ + strpbrk (s, a)) +# endif #elif defined GNULIB_POSIXCHECK # undef strpbrk # define strpbrk strpbrk_is_unportable__use_gnulib_module_strpbrk_for_portability @@ -372,6 +382,15 @@ extern size_t mbscspn (const char *string, const char *accept); #endif +#if @GNULIB_MBSPBRK@ +/* Find the first occurrence in the character string STRING of any character + in the character string ACCEPT. Return the pointer to it, or NULL if none + exists. + Unlike strpbrk(), this function works correctly in multibyte locales. */ +# define mbspbrk rpl_mbspbrk /* avoid collision with HP-UX function */ +extern char * mbspbrk (const char *string, const char *accept); +#endif + #ifdef __cplusplus } --- m4/string_h.m4 5 Feb 2007 03:06:40 -0000 1.13 +++ m4/string_h.m4 5 Feb 2007 03:11:33 -0000 @@ -73,4 +73,5 @@ GNULIB_MBSCASECMP=0; AC_SUBST([GNULIB_MBSCASECMP]) GNULIB_MBSCASESTR=0; AC_SUBST([GNULIB_MBSCASESTR]) GNULIB_MBSCSPN=0; AC_SUBST([GNULIB_MBSCSPN]) + GNULIB_MBSPBRK=0; AC_SUBST([GNULIB_MBSPBRK]) ]) --- modules/string 5 Feb 2007 03:06:40 -0000 1.12 +++ modules/string 5 Feb 2007 03:11:34 -0000 @@ -27,6 +27,7 @@ -e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \ -e 's|@''GNULIB_MBSCASESTR''@|$(GNULIB_MBSCASESTR)|g' \ -e 's|@''GNULIB_MBSCSPN''@|$(GNULIB_MBSCSPN)|g' \ + -e 's|@''GNULIB_MBSPBRK''@|$(GNULIB_MBSPBRK)|g' \ -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \ -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \ -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \