This introduces a new module mbscasestr. mbscasestr() is like glibc's strcasestr(), except it works on character strings (i.e. on multibyte strings).
The module strcasestr now only provides an non-internationalized equivalent to glibc's non-internationalized function!! 2007-02-04 Bruno Haible <[EMAIL PROTECTED]> New module mbscasestr, reduced goal of strcasestr. * modules/mbscasestr: New file. * lib/mbscasestr.c: New file, copied from lib/strcasestr.c. (mbscasestr): Renamed from strcasestr. * lib/strcasestr.c: Don't include mbuiter.h. (strcasestr): Remove support for multibyte locales. * lib/string_.h (strcasestr): Don`t rename. Declare only if missing. Change the conditional link warning. (mbscasestr): New declaration. * m4/mbscasestr.m4: New file. * m4/strcasestr.m4 (gl_FUNC_STRCASESTR): Enable the replacement only if the system does not have strcasestr. Set HAVE_STRCASESTR instead of REPLACE_STRCASESTR. * m4/string_h.m4 (gl_HEADER_STRING_H_DEFAULTS): Initialize HAVE_STRCASESTR instead of REPLACE_STRCASESTR. (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize GNULIB_MBSCASESTR. * modules/string (string.h): Also substitute GNULIB_MBSCASESTR. Substitute HAVE_STRCASESTR instead of REPLACE_STRCASESTR. * MODULES.html.sh (Internationalization functions): Add mbscasestr. =========================== modules/mbscasestr ============================= Description: mbscasestr() function: case-insensitive search for a substring in a string. Files: lib/mbscasestr.c m4/mbscasestr.m4 m4/mbrtowc.m4 Depends-on: mbuiter string configure.ac: gl_FUNC_MBSCASESTR gl_STRING_MODULE_INDICATOR([mbscasestr]) Makefile.am: lib_SOURCES += mbscasestr.c Include: <string.h> License: LGPL Maintainer: Bruno Haible =========================== m4/mbscasestr.m4 =============================== # mbscasestr.m4 serial 1 dnl Copyright (C) 2007 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. AC_DEFUN([gl_FUNC_MBSCASESTR], [ gl_PREREQ_MBSCASESTR ]) # Prerequisites of lib/mbscasestr.c. AC_DEFUN([gl_PREREQ_MBSCASESTR], [ AC_REQUIRE([gl_FUNC_MBRTOWC]) : ]) ============================================================================ *** lib/mbscasestr.c 5 Feb 2007 02:20:09 -0000 1.1 --- lib/mbscasestr.c 5 Feb 2007 02:35:45 -0000 *************** *** 30,41 **** #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) ! /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive ! comparison. Note: This function may, in multibyte locales, return success even if strlen (haystack) < strlen (needle) ! */ char * ! strcasestr (const char *haystack, const char *needle) { /* Be careful not to look at the entire extent of haystack or needle until needed. This is useful because of these two cases: --- 30,41 ---- #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) ! /* Find the first occurrence of the character string NEEDLE in the character ! string HAYSTACK, using case-insensitive comparison. Note: This function may, in multibyte locales, return success even if strlen (haystack) < strlen (needle) ! */ char * ! mbscasestr (const char *haystack, const char *needle) { /* Be careful not to look at the entire extent of haystack or needle until needed. This is useful because of these two cases: *** lib/strcasestr.c 27 Jan 2007 13:17:16 -0000 1.5 --- lib/strcasestr.c 5 Feb 2007 02:35:46 -0000 *************** *** 24,33 **** #include <ctype.h> #include <stddef.h> /* for NULL, in case a nonstandard string.h lacks it */ - #if HAVE_MBRTOWC - # include "mbuiter.h" - #endif - #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive --- 24,29 ---- *************** *** 37,145 **** char * strcasestr (const char *haystack, const char *needle) { ! /* Be careful not to look at the entire extent of haystack or needle ! until needed. This is useful because of these two cases: ! - haystack may be very long, and a match of needle found early, ! - needle may be very long, and not even a short initial segment of ! needle may be found in haystack. */ ! #if HAVE_MBRTOWC ! if (MB_CUR_MAX > 1) { ! mbui_iterator_t iter_needle; ! mbui_init (iter_needle, needle); ! if (mbui_avail (iter_needle)) { ! mbchar_t b; ! mbui_iterator_t iter_haystack; ! ! mb_copy (&b, &mbui_cur (iter_needle)); ! if (b.wc_valid) ! b.wc = towlower (b.wc); ! ! mbui_init (iter_haystack, haystack); ! for (;; mbui_advance (iter_haystack)) { ! mbchar_t c; ! if (!mbui_avail (iter_haystack)) ! /* No match. */ ! return NULL; ! ! mb_copy (&c, &mbui_cur (iter_haystack)); ! if (c.wc_valid) ! c.wc = towlower (c.wc); ! if (mb_equal (c, b)) ! /* The first character matches. */ { ! mbui_iterator_t rhaystack; ! mbui_iterator_t rneedle; ! ! memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t)); ! mbui_advance (rhaystack); ! ! mbui_init (rneedle, needle); ! if (!mbui_avail (rneedle)) ! abort (); ! mbui_advance (rneedle); ! ! for (;; mbui_advance (rhaystack), mbui_advance (rneedle)) ! { ! if (!mbui_avail (rneedle)) ! /* Found a match. */ ! return (char *) mbui_cur_ptr (iter_haystack); ! if (!mbui_avail (rhaystack)) ! /* No match. */ ! return NULL; ! if (!mb_caseequal (mbui_cur (rhaystack), ! mbui_cur (rneedle))) ! /* Nothing in this round. */ ! break; ! } } } } - else - return (char *) haystack; } else ! #endif ! { ! if (*needle != '\0') ! { ! /* Speed up the following searches of needle by caching its first ! character. */ ! unsigned char b = TOLOWER ((unsigned char) *needle); ! ! needle++; ! for (;; haystack++) ! { ! if (*haystack == '\0') ! /* No match. */ ! return NULL; ! if (TOLOWER ((unsigned char) *haystack) == b) ! /* The first character matches. */ ! { ! const char *rhaystack = haystack + 1; ! const char *rneedle = needle; ! ! for (;; rhaystack++, rneedle++) ! { ! if (*rneedle == '\0') ! /* Found a match. */ ! return (char *) haystack; ! if (*rhaystack == '\0') ! /* No match. */ ! return NULL; ! if (TOLOWER ((unsigned char) *rhaystack) ! != TOLOWER ((unsigned char) *rneedle)) ! /* Nothing in this round. */ ! break; ! } ! } ! } ! } ! else ! return (char *) haystack; ! } } --- 33,72 ---- char * strcasestr (const char *haystack, const char *needle) { ! if (*needle != '\0') { ! /* Speed up the following searches of needle by caching its first ! character. */ ! unsigned char b = TOLOWER ((unsigned char) *needle); ! needle++; ! for (;; haystack++) { ! if (*haystack == '\0') ! /* No match. */ ! return NULL; ! if (TOLOWER ((unsigned char) *haystack) == b) ! /* The first character matches. */ { ! const char *rhaystack = haystack + 1; ! const char *rneedle = needle; ! for (;; rhaystack++, rneedle++) { ! if (*rneedle == '\0') ! /* Found a match. */ ! return (char *) haystack; ! if (*rhaystack == '\0') ! /* No match. */ ! return NULL; ! if (TOLOWER ((unsigned char) *rhaystack) ! != TOLOWER ((unsigned char) *rneedle)) ! /* Nothing in this round. */ ! break; } } } } else ! return (char *) haystack; } *** lib/string_.h 5 Feb 2007 02:15:46 -0000 1.12 --- lib/string_.h 5 Feb 2007 02:35:46 -0000 *************** *** 257,275 **** #endif /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive ! comparison. ! Note: This function may, in multibyte locales, return success even if ! strlen (haystack) < strlen (needle) ! */ ! #if @GNULIB_STRCASESTR@ ! # if @REPLACE_STRCASESTR@ ! # undef strcasestr ! # define strcasestr rpl_strcasestr extern char *strcasestr (const char *haystack, const char *needle); ! # endif ! #elif defined GNULIB_POSIXCHECK # undef strcasestr # define strcasestr(a,b) \ ! (GL_LINK_WARNING ("strcasestr is often incorrectly implemented for multibyte locales - use gnulib module 'strcasestr' for correct and portable internationalization"), \ strcasestr (a, b)) #endif --- 257,273 ---- #endif /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive ! comparison. */ ! #if ! @HAVE_STRCASESTR@ extern char *strcasestr (const char *haystack, const char *needle); ! #endif ! #if defined GNULIB_POSIXCHECK ! /* strcasestr() does not work with multibyte strings: ! It is a glibc extension, and glibc implements it only for unibyte ! locales. */ # undef strcasestr # define strcasestr(a,b) \ ! (GL_LINK_WARNING ("strcasestr does work correctly on character strings in multibyte locales - use mbscasestr if you care about internationalization, or use c-strcasestr if you want a locale independent function"), \ strcasestr (a, b)) #endif *************** *** 345,350 **** --- 343,357 ---- extern int mbscasecmp (const char *s1, const char *s2); #endif + #if @GNULIB_MBSCASESTR@ + /* Find the first occurrence of the character string NEEDLE in the character + string HAYSTACK, using case-insensitive comparison. + Note: This function may, in multibyte locales, return success even if + strlen (haystack) < strlen (needle) ! + Unlike strcasestr(), this function works correctly in multibyte locales. */ + extern char * mbscasestr (const char *haystack, const char *needle); + #endif + #ifdef __cplusplus } *** m4/strcasestr.m4 27 Jan 2007 14:43:17 -0000 1.4 --- m4/strcasestr.m4 5 Feb 2007 02:35:46 -0000 *************** *** 1,4 **** ! # strcasestr.m4 serial 4 dnl Copyright (C) 2005, 2007 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, --- 1,4 ---- ! # strcasestr.m4 serial 5 dnl Copyright (C) 2005, 2007 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, *************** *** 7,17 **** AC_DEFUN([gl_FUNC_STRCASESTR], [ AC_REQUIRE([gl_HEADER_STRING_H_DEFAULTS]) ! dnl No known system has a strcasestr() function that works correctly in ! dnl multibyte locales. Therefore we use our version always. ! AC_LIBOBJ(strcasestr) ! REPLACE_STRCASESTR=1 ! gl_PREREQ_STRCASESTR ]) # Prerequisites of lib/strcasestr.c. --- 7,17 ---- AC_DEFUN([gl_FUNC_STRCASESTR], [ AC_REQUIRE([gl_HEADER_STRING_H_DEFAULTS]) ! AC_REPLACE_FUNCS(strcasestr) ! if test $ac_cv_func_strcasestr = no; then ! HAVE_STRCASESTR=0 ! gl_PREREQ_STRCASESTR ! fi ]) # Prerequisites of lib/strcasestr.c. *** m4/string_h.m4 5 Feb 2007 02:27:35 -0000 1.11 --- m4/string_h.m4 5 Feb 2007 02:35:46 -0000 *************** *** 41,48 **** HAVE_DECL_STRNLEN=1; AC_SUBST([HAVE_DECL_STRNLEN]) HAVE_STRPBRK=1; AC_SUBST([HAVE_STRPBRK]) HAVE_STRSEP=1; AC_SUBST([HAVE_STRSEP]) HAVE_DECL_STRTOK_R=1; AC_SUBST([HAVE_DECL_STRTOK_R]) - REPLACE_STRCASESTR=0; AC_SUBST([REPLACE_STRCASESTR]) ]) AC_DEFUN([gl_STRING_MODULE_INDICATOR], --- 41,48 ---- HAVE_DECL_STRNLEN=1; AC_SUBST([HAVE_DECL_STRNLEN]) HAVE_STRPBRK=1; AC_SUBST([HAVE_STRPBRK]) HAVE_STRSEP=1; AC_SUBST([HAVE_STRSEP]) + HAVE_STRCASESTR=1; AC_SUBST([HAVE_STRCASESTR]) HAVE_DECL_STRTOK_R=1; AC_SUBST([HAVE_DECL_STRTOK_R]) ]) AC_DEFUN([gl_STRING_MODULE_INDICATOR], *************** *** 71,74 **** --- 71,75 ---- GNULIB_MBSRCHR=0; AC_SUBST([GNULIB_MBSRCHR]) GNULIB_MBSSTR=0; AC_SUBST([GNULIB_MBSSTR]) GNULIB_MBSCASECMP=0; AC_SUBST([GNULIB_MBSCASECMP]) + GNULIB_MBSCASESTR=0; AC_SUBST([GNULIB_MBSCASESTR]) ]) *** modules/string 5 Feb 2007 02:15:46 -0000 1.10 --- modules/string 5 Feb 2007 02:35:46 -0000 *************** *** 25,30 **** --- 25,31 ---- -e 's|@''GNULIB_MBSRCHR''@|$(GNULIB_MBSRCHR)|g' \ -e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \ -e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \ + -e 's|@''GNULIB_MBSCASESTR''@|$(GNULIB_MBSCASESTR)|g' \ -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \ -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \ -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \ *************** *** 52,59 **** -e 's|@''HAVE_DECL_STRNLEN''@|$(HAVE_DECL_STRNLEN)|g' \ -e 's|@''HAVE_STRPBRK''@|$(HAVE_STRPBRK)|g' \ -e 's|@''HAVE_STRSEP''@|$(HAVE_STRSEP)|g' \ -e 's|@''HAVE_DECL_STRTOK_R''@|$(HAVE_DECL_STRTOK_R)|g' \ - -e 's|@''REPLACE_STRCASESTR''@|$(REPLACE_STRCASESTR)|g' \ < $(srcdir)/string_.h; \ } > [EMAIL PROTECTED] mv [EMAIL PROTECTED] $@ --- 53,60 ---- -e 's|@''HAVE_DECL_STRNLEN''@|$(HAVE_DECL_STRNLEN)|g' \ -e 's|@''HAVE_STRPBRK''@|$(HAVE_STRPBRK)|g' \ -e 's|@''HAVE_STRSEP''@|$(HAVE_STRSEP)|g' \ + -e 's|@''HAVE_STRCASESTR''@|$(HAVE_STRCASESTR)|g' \ -e 's|@''HAVE_DECL_STRTOK_R''@|$(HAVE_DECL_STRTOK_R)|g' \ < $(srcdir)/string_.h; \ } > [EMAIL PROTECTED] mv [EMAIL PROTECTED] $@ *** MODULES.html.sh 5 Feb 2007 01:57:07 -0000 1.184 --- MODULES.html.sh 5 Feb 2007 02:39:03 -0000 *************** *** 2164,2169 **** --- 2164,2170 ---- func_module mbsrchr func_module mbsstr func_module mbscasecmp + func_module mbscasestr func_module mbswidth func_module memcasecmp func_module memcoll