The function mbscspn() is a variant of strcspn() which works also with multibyte strings.
2007-02-04 Bruno Haible <[EMAIL PROTECTED]> New module mbscspn. * modules/mbscspn: New file. * lib/mbscspn.c: New file. * lib/string_.h (strcspn): Add a conditional link warning. (mbscspn): New declaration. * m4/mbscspn.m4: New file. * m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize GNULIB_MBSCSPN. * modules/string (string.h): Also substitute GNULIB_MBSCSPN. * MODULES.html.sh (Internationalization functions): Add mbscspn. ========================= modules/mbscspn.m4 ============================= Description: mbscspn() function: search a string for any of a set of characters. Files: lib/mbscspn.c m4/mbscspn.m4 m4/mbrtowc.m4 Depends-on: mbuiter string mbschr strcspn configure.ac: gl_FUNC_MBSCSPN gl_STRING_MODULE_INDICATOR([mbscspn]) Makefile.am: lib_SOURCES += mbscspn.c Include: <string.h> License: LGPL Maintainer: Bruno Haible ========================== lib/mbscspn.c ================================= /* Searching a string for a character among a given set of characters. Copyright (C) 1999, 2002, 2006-2007 Free Software Foundation, Inc. Written by Bruno Haible <[EMAIL PROTECTED]>, 2007. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include <config.h> /* Specification. */ #include <string.h> #if HAVE_MBRTOWC # include "mbuiter.h" #endif /* Find the first occurrence in the character string STRING of any character in the character string ACCEPT. Return the number of bytes from the beginning of the string to this occurrence, or to the end of the string if none exists. */ size_t mbscspn (const char *string, const char *accept) { /* Optimize two cases. */ if (accept[0] == '\0') return strlen (string); if (accept[1] == '\0') { const char *ptr = mbschr (string, accept[0]); return (ptr != NULL ? ptr - string : strlen (string)); } /* General case. */ #if HAVE_MBRTOWC if (MB_CUR_MAX > 1) { mbui_iterator_t iter; for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter)) { if (mb_len (mbui_cur (iter)) == 1) { if (mbschr (accept, (unsigned char) * mbui_cur_ptr (iter))) return mbui_cur_ptr (iter) - string; } else { mbui_iterator_t aiter; for (mbui_init (aiter, accept); mbui_avail (aiter); mbui_advance (aiter)) if (mb_equal (mbui_cur (aiter), mbui_cur (iter))) return mbui_cur_ptr (iter) - string; } } return strlen (string); } else #endif return strcspn (string, accept); } ========================== m4/mbscspn.m4 ================================= # mbscspn.m4 serial 1 dnl Copyright (C) 2007 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. AC_DEFUN([gl_FUNC_MBSCSPN], [ gl_PREREQ_MBSCSPN ]) # Prerequisites of lib/mbscspn.c. AC_DEFUN([gl_PREREQ_MBSCSPN], [ AC_REQUIRE([gl_FUNC_MBRTOWC]) : ]) ========================================================================== --- MODULES.html.sh 5 Feb 2007 02:42:27 -0000 1.185 +++ MODULES.html.sh 5 Feb 2007 03:02:46 -0000 @@ -2165,6 +2165,7 @@ func_module mbsstr func_module mbscasecmp func_module mbscasestr + func_module mbscspn func_module mbswidth func_module memcasecmp func_module memcoll --- lib/string_.h 5 Feb 2007 02:42:27 -0000 1.13 +++ lib/string_.h 5 Feb 2007 03:02:46 -0000 @@ -201,6 +201,17 @@ # define strnlen strnlen_is_unportable__use_gnulib_module_strnlen_for_portability #endif +#if defined GNULIB_POSIXCHECK +/* strcspn() assumes the second argument is a list of single-byte characters. + Even in this simple case, it does not work with multibyte strings if the + locale encoding is GB18030 and one of the characters to be searched is a + digit. */ +# undef strcspn +# define strcspn(s,a) \ + (GL_LINK_WARNING ("strcspn cannot work correctly on character strings in multibyte locales - use mbscspn if you care about internationalization"), \ + strcspn (s, a)) +#endif + /* Find the first occurrence in S of any character in ACCEPT. */ #if @GNULIB_STRPBRK@ # if ! @HAVE_STRPBRK@ @@ -352,6 +363,15 @@ extern char * mbscasestr (const char *haystack, const char *needle); #endif +#if @GNULIB_MBSCSPN@ +/* Find the first occurrence in the character string STRING of any character + in the character string ACCEPT. Return the number of bytes from the + beginning of the string to this occurrence, or to the end of the string + if none exists. + Unlike strcspn(), this function works correctly in multibyte locales. */ +extern size_t mbscspn (const char *string, const char *accept); +#endif + #ifdef __cplusplus } --- m4/string_h.m4 5 Feb 2007 02:42:27 -0000 1.12 +++ m4/string_h.m4 5 Feb 2007 03:02:47 -0000 @@ -72,4 +72,5 @@ GNULIB_MBSSTR=0; AC_SUBST([GNULIB_MBSSTR]) GNULIB_MBSCASECMP=0; AC_SUBST([GNULIB_MBSCASECMP]) GNULIB_MBSCASESTR=0; AC_SUBST([GNULIB_MBSCASESTR]) + GNULIB_MBSCSPN=0; AC_SUBST([GNULIB_MBSCSPN]) ]) --- modules/string 5 Feb 2007 02:42:27 -0000 1.11 +++ modules/string 5 Feb 2007 03:02:47 -0000 @@ -26,6 +26,7 @@ -e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \ -e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \ -e 's|@''GNULIB_MBSCASESTR''@|$(GNULIB_MBSCASESTR)|g' \ + -e 's|@''GNULIB_MBSCSPN''@|$(GNULIB_MBSCSPN)|g' \ -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \ -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \ -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \