This patch migrates the 'mbmemcasecoll' module from wchar_t to char32_t.

2023-07-02  Bruno Haible  <br...@clisp.org>

        mbmemcasecoll: Overcome wchar_t limitations.
        * lib/mbmemcasecoll.c: Include <uchar.h> instead of <wctype.h>.
        (apply_c32tolower): Renamed from apply_towlower. Use mbrtoc32 instead of
        mbrtowc. Use c32tolower instead of towlower. Use c32rtomb instead of
        wcrtomb.
        * modules/mbmemcasecoll (Depends-on): Remove mbrtowc, wcrtomb. Add
        uchar, mbrtoc32, c32rtomb, c32tolower.
        (Link): Add $(LIBUNISTRING) $(LIBC32CONV).
        * modules/mbmemcasecoll-tests (Makefile.am): Link test-mbmemcasecoll
        with $(LIBUNISTRING) $(LIBC32CONV).

diff --git a/lib/mbmemcasecoll.c b/lib/mbmemcasecoll.c
index 6f4acb7e77..de7390f6d1 100644
--- a/lib/mbmemcasecoll.c
+++ b/lib/mbmemcasecoll.c
@@ -27,11 +27,11 @@
 /* Get tolower().  */
 #include <ctype.h>
 
-/* Get mbstate_t, mbrtowc(), wcrtomb().  */
+/* Get mbstate_t.  */
 #include <wchar.h>
 
-/* Get towlower().  */
-#include <wctype.h>
+/* Get char32_t, mbrtoc32(), c32rtomb(), c32tolower().  */
+#include <uchar.h>
 
 #include "malloca.h"
 #include "memcmp2.h"
@@ -39,11 +39,11 @@
 
 #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
 
-/* Apply towlower() to the multibyte character sequence in INBUF, storing the
+/* Apply c32tolower() to the multibyte character sequence in INBUF, storing the
    result as a multibyte character sequence in OUTBUF.  */
 static size_t
-apply_towlower (const char *inbuf, size_t inbufsize,
-                char *outbuf, size_t outbufsize)
+apply_c32tolower (const char *inbuf, size_t inbufsize,
+                  char *outbuf, size_t outbufsize)
 {
   char *outbuf_orig = outbuf;
   size_t remaining;
@@ -51,12 +51,12 @@ apply_towlower (const char *inbuf, size_t inbufsize,
   remaining = inbufsize;
   while (remaining > 0)
     {
-      wchar_t wc1;
+      char32_t wc1;
       size_t n1;
       mbstate_t state;
 
       memset (&state, '\0', sizeof (mbstate_t));
-      n1 = mbrtowc (&wc1, inbuf, remaining, &state);
+      n1 = mbrtoc32 (&wc1, inbuf, remaining, &state);
       if (n1 == (size_t)(-2))
         break;
       if (n1 != (size_t)(-1))
@@ -65,14 +65,16 @@ apply_towlower (const char *inbuf, size_t inbufsize,
 
           if (n1 == 0) /* NUL character? */
             n1 = 1;
+          else if (n1 == (size_t)(-3))
+            n1 = 0;
 
-          wc2 = towlower (wc1);
+          wc2 = c32tolower (wc1);
           if (wc2 != wc1)
             {
               size_t n2;
 
               memset (&state, '\0', sizeof (mbstate_t));
-              n2 = wcrtomb (outbuf, wc2, &state);
+              n2 = c32rtomb (outbuf, wc2, &state);
               if (n2 != (size_t)(-1))
                 {
                   /* Store the translated multibyte character.  */
@@ -162,8 +164,8 @@ mbmemcasecoll (const char *s1, size_t s1len, const char 
*s2, size_t s2len,
   /* Case-fold the two argument strings.  */
   if (MB_CUR_MAX > 1)
     {
-      t1len = apply_towlower (s1, s1len, t1, t1len);
-      t2len = apply_towlower (s2, s2len, t2, t2len);
+      t1len = apply_c32tolower (s1, s1len, t1, t1len);
+      t2len = apply_c32tolower (s2, s2len, t2, t2len);
     }
   else
     {
diff --git a/modules/mbmemcasecoll b/modules/mbmemcasecoll
index 7401d310c9..ee52717f3e 100644
--- a/modules/mbmemcasecoll
+++ b/modules/mbmemcasecoll
@@ -8,12 +8,14 @@ lib/mbmemcasecoll.c
 
 Depends-on:
 stdbool
+wchar
+uchar
 malloca
-mbrtowc
-wcrtomb
+mbrtoc32
+c32rtomb
+c32tolower
 memcmp2
 memcoll
-wchar
 
 configure.ac:
 
@@ -24,7 +26,9 @@ Include:
 "mbmemcasecoll.h"
 
 Link:
+$(LTLIBUNISTRING) when linking with libtool, $(LIBUNISTRING) otherwise
 $(MBRTOWC_LIB)
+$(LTLIBC32CONV) when linking with libtool, $(LIBC32CONV) otherwise
 
 License:
 GPL
diff --git a/modules/mbmemcasecoll-tests b/modules/mbmemcasecoll-tests
index 8fa2805872..28a982e4e6 100644
--- a/modules/mbmemcasecoll-tests
+++ b/modules/mbmemcasecoll-tests
@@ -25,4 +25,4 @@ TESTS_ENVIRONMENT += \
   LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' \
   LOCALE_TR_UTF8='@LOCALE_TR_UTF8@'
 check_PROGRAMS += test-mbmemcasecoll
-test_mbmemcasecoll_LDADD = $(LDADD) $(SETLOCALE_LIB) $(MBRTOWC_LIB)
+test_mbmemcasecoll_LDADD = $(LDADD) $(SETLOCALE_LIB) $(LIBUNISTRING) 
$(MBRTOWC_LIB) $(LIBC32CONV)




Reply via email to