The function mbstok_r is like strtok_r, except that it also works on multibyte strings.
2007-02-04 Bruno Haible <[EMAIL PROTECTED]> New module mbstok_r. * modules/mbstok_r: New file. * lib/mbstok_r.c: New file. * lib/string_.h (strtok_r): Change argument names to match the comments. Add a conditional link warning. (mbstok_r): New declaration. * m4/mbstok_r.m4: New file. * m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize GNULIB_MBSTOK_R. * modules/string (string.h): Also substitute GNULIB_MBSTOK_R. * MODULES.html.sh (Internationalization functions): Add mbstok_r. ========================== modules/mbstok_r =================================== Description: mbstok_r() function: split string into tokens, thread safe. Files: lib/mbstok_r.c m4/mbstok_r.m4 m4/mbrtowc.m4 Depends-on: mbuiter string mbsspn mbspbrk strtok_r configure.ac: gl_FUNC_MBSTOK_R gl_STRING_MODULE_INDICATOR([mbstok_r]) Makefile.am: lib_SOURCES += mbstok_r.c Include: <string.h> License: LGPL Maintainer: Bruno Haible =========================== lib/mbstok_r.c ==================================== /* Tokenizing a string. Copyright (C) 1999, 2002, 2006-2007 Free Software Foundation, Inc. Written by Bruno Haible <[EMAIL PROTECTED]>, 2007. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include <config.h> /* Specification. */ #include <string.h> #if HAVE_MBRTOWC # include "mbuiter.h" #endif char * mbstok_r (char *string, const char *delim, char **save_ptr) { #if HAVE_MBRTOWC if (MB_CUR_MAX > 1) { if (string == NULL) { string = *save_ptr; if (string == NULL) return NULL; /* reminder that end of token sequence has been reached */ } /* Skip leading delimiters. */ string += mbsspn (string, delim); /* Found a token? */ if (*string == '\0') { *save_ptr = NULL; return NULL; } /* Move past the token. */ { char *token_end = mbspbrk (string, delim); if (token_end != NULL) { /* NUL-terminate the token. */ *token_end = '\0'; *save_ptr = token_end + 1; } else *save_ptr = NULL; } return string; } else #endif return strtok_r (string, delim, save_ptr); } ============================ m4/mbstok_r.m4 =================================== # mbstok_r.m4 serial 1 dnl Copyright (C) 2007 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. AC_DEFUN([gl_FUNC_MBSTOK_R], [ gl_PREREQ_MBSTOK_R ]) # Prerequisites of lib/mbstok_r.c. AC_DEFUN([gl_PREREQ_MBSTOK_R], [ AC_REQUIRE([gl_FUNC_MBRTOWC]) : ]) =============================================================================== --- MODULES.html.sh 5 Feb 2007 03:27:17 -0000 1.188 +++ MODULES.html.sh 5 Feb 2007 03:30:59 -0000 @@ -2168,6 +2168,7 @@ func_module mbscspn func_module mbspbrk func_module mbsspn + func_module mbstok_r func_module mbswidth func_module memcasecmp func_module memcoll --- lib/string_.h 5 Feb 2007 03:27:17 -0000 1.16 +++ lib/string_.h 5 Feb 2007 03:30:59 -0000 @@ -325,8 +325,14 @@ See also strsep(). */ #if @GNULIB_STRTOK_R@ # if ! @HAVE_DECL_STRTOK_R@ -extern char *strtok_r (char *restrict __s, char const *restrict __sep, - char **restrict __lasts); +extern char *strtok_r (char *restrict s, char const *restrict delim, + char **restrict save_ptr); +# endif +# if defined GNULIB_POSIXCHECK +# undef strtok_r +# define strtok_r(s,d,p) \ + (GL_LINK_WARNING ("strtok_r cannot work correctly on character strings in multibyte locales - use mbstok_r if you care about internationalization"), \ + strtok_r (s, d, p)) # endif #elif defined GNULIB_POSIXCHECK # undef strtok_r @@ -409,6 +415,24 @@ extern size_t mbsspn (const char *string, const char *reject); #endif +#if @GNULIB_MBSTOK_R@ +/* Parse the character string STRING into tokens separated by characters in + the character string DELIM. + If STRING is NULL, the saved pointer in SAVE_PTR is used as + the next starting point. For example: + char s[] = "-abc-=-def"; + char *sp; + x = mbstok_r(s, "-", &sp); // x = "abc", sp = "=-def" + x = mbstok_r(NULL, "-=", &sp); // x = "def", sp = NULL + x = mbstok_r(NULL, "=", &sp); // x = NULL + // s = "abc\0-def\0" + + Caveat: It modifies the original string. + Caveat: These functions cannot be used on constant strings. + Caveat: The identity of the delimiting character is lost. */ +extern char * mbstok_r (char *string, const char *delim, char **save_ptr); +#endif + #ifdef __cplusplus } --- m4/string_h.m4 5 Feb 2007 03:27:17 -0000 1.15 +++ m4/string_h.m4 5 Feb 2007 03:30:59 -0000 @@ -75,4 +75,5 @@ GNULIB_MBSCSPN=0; AC_SUBST([GNULIB_MBSCSPN]) GNULIB_MBSPBRK=0; AC_SUBST([GNULIB_MBSPBRK]) GNULIB_MBSSPN=0; AC_SUBST([GNULIB_MBSSPN]) + GNULIB_MBSTOK_R=0; AC_SUBST([GNULIB_MBSTOK_R]) ]) --- modules/string 5 Feb 2007 03:27:17 -0000 1.14 +++ modules/string 5 Feb 2007 03:30:59 -0000 @@ -29,6 +29,7 @@ -e 's|@''GNULIB_MBSCSPN''@|$(GNULIB_MBSCSPN)|g' \ -e 's|@''GNULIB_MBSPBRK''@|$(GNULIB_MBSPBRK)|g' \ -e 's|@''GNULIB_MBSSPN''@|$(GNULIB_MBSSPN)|g' \ + -e 's|@''GNULIB_MBSTOK_R''@|$(GNULIB_MBSTOK_R)|g' \ -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \ -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \ -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \