This patch migrates the 'mbmemcasecoll' module from wchar_t to char32_t.
2023-07-02 Bruno Haible <br...@clisp.org> mbmemcasecoll: Overcome wchar_t limitations. * lib/mbmemcasecoll.c: Include <uchar.h> instead of <wctype.h>. (apply_c32tolower): Renamed from apply_towlower. Use mbrtoc32 instead of mbrtowc. Use c32tolower instead of towlower. Use c32rtomb instead of wcrtomb. * modules/mbmemcasecoll (Depends-on): Remove mbrtowc, wcrtomb. Add uchar, mbrtoc32, c32rtomb, c32tolower. (Link): Add $(LIBUNISTRING) $(LIBC32CONV). * modules/mbmemcasecoll-tests (Makefile.am): Link test-mbmemcasecoll with $(LIBUNISTRING) $(LIBC32CONV). diff --git a/lib/mbmemcasecoll.c b/lib/mbmemcasecoll.c index 6f4acb7e77..de7390f6d1 100644 --- a/lib/mbmemcasecoll.c +++ b/lib/mbmemcasecoll.c @@ -27,11 +27,11 @@ /* Get tolower(). */ #include <ctype.h> -/* Get mbstate_t, mbrtowc(), wcrtomb(). */ +/* Get mbstate_t. */ #include <wchar.h> -/* Get towlower(). */ -#include <wctype.h> +/* Get char32_t, mbrtoc32(), c32rtomb(), c32tolower(). */ +#include <uchar.h> #include "malloca.h" #include "memcmp2.h" @@ -39,11 +39,11 @@ #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) -/* Apply towlower() to the multibyte character sequence in INBUF, storing the +/* Apply c32tolower() to the multibyte character sequence in INBUF, storing the result as a multibyte character sequence in OUTBUF. */ static size_t -apply_towlower (const char *inbuf, size_t inbufsize, - char *outbuf, size_t outbufsize) +apply_c32tolower (const char *inbuf, size_t inbufsize, + char *outbuf, size_t outbufsize) { char *outbuf_orig = outbuf; size_t remaining; @@ -51,12 +51,12 @@ apply_towlower (const char *inbuf, size_t inbufsize, remaining = inbufsize; while (remaining > 0) { - wchar_t wc1; + char32_t wc1; size_t n1; mbstate_t state; memset (&state, '\0', sizeof (mbstate_t)); - n1 = mbrtowc (&wc1, inbuf, remaining, &state); + n1 = mbrtoc32 (&wc1, inbuf, remaining, &state); if (n1 == (size_t)(-2)) break; if (n1 != (size_t)(-1)) @@ -65,14 +65,16 @@ apply_towlower (const char *inbuf, size_t inbufsize, if (n1 == 0) /* NUL character? */ n1 = 1; + else if (n1 == (size_t)(-3)) + n1 = 0; - wc2 = towlower (wc1); + wc2 = c32tolower (wc1); if (wc2 != wc1) { size_t n2; memset (&state, '\0', sizeof (mbstate_t)); - n2 = wcrtomb (outbuf, wc2, &state); + n2 = c32rtomb (outbuf, wc2, &state); if (n2 != (size_t)(-1)) { /* Store the translated multibyte character. */ @@ -162,8 +164,8 @@ mbmemcasecoll (const char *s1, size_t s1len, const char *s2, size_t s2len, /* Case-fold the two argument strings. */ if (MB_CUR_MAX > 1) { - t1len = apply_towlower (s1, s1len, t1, t1len); - t2len = apply_towlower (s2, s2len, t2, t2len); + t1len = apply_c32tolower (s1, s1len, t1, t1len); + t2len = apply_c32tolower (s2, s2len, t2, t2len); } else { diff --git a/modules/mbmemcasecoll b/modules/mbmemcasecoll index 7401d310c9..ee52717f3e 100644 --- a/modules/mbmemcasecoll +++ b/modules/mbmemcasecoll @@ -8,12 +8,14 @@ lib/mbmemcasecoll.c Depends-on: stdbool +wchar +uchar malloca -mbrtowc -wcrtomb +mbrtoc32 +c32rtomb +c32tolower memcmp2 memcoll -wchar configure.ac: @@ -24,7 +26,9 @@ Include: "mbmemcasecoll.h" Link: +$(LTLIBUNISTRING) when linking with libtool, $(LIBUNISTRING) otherwise $(MBRTOWC_LIB) +$(LTLIBC32CONV) when linking with libtool, $(LIBC32CONV) otherwise License: GPL diff --git a/modules/mbmemcasecoll-tests b/modules/mbmemcasecoll-tests index 8fa2805872..28a982e4e6 100644 --- a/modules/mbmemcasecoll-tests +++ b/modules/mbmemcasecoll-tests @@ -25,4 +25,4 @@ TESTS_ENVIRONMENT += \ LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' \ LOCALE_TR_UTF8='@LOCALE_TR_UTF8@' check_PROGRAMS += test-mbmemcasecoll -test_mbmemcasecoll_LDADD = $(LDADD) $(SETLOCALE_LIB) $(MBRTOWC_LIB) +test_mbmemcasecoll_LDADD = $(LDADD) $(SETLOCALE_LIB) $(LIBUNISTRING) $(MBRTOWC_LIB) $(LIBC32CONV)