I did: > 2024-05-23 Bruno Haible <br...@clisp.org> > > mbrtoc32: Strengthen tests. > * tests/test-mbrtoc32.c (main): Add tests for one-by-one input in the > UTF-8 and GB18030 encodings.
This new test fails on NetBSD 10.0, reported by the CI. In this case, mbrtoc32 returns (size_t)-2 because mbrtowc returns (size_t)-2, and the latter is a bug in NetBSD's GB18030 locale support. So, here is a patch thar - adds the same test to the mbrtowc unit test, - disables testing in this locale on NetBSD. 2024-05-27 Bruno Haible <br...@clisp.org> tests: Don't test on the broken NetBSD 10.0 zh_CN.GB18030 locale. * tests/test-mbrtowc.c (main): Add tests for one-by-one input in the UTF-8 and GB18030 encodings. * m4/locale-zh.m4 (gt_LOCALE_ZH_CN): Add a sanity check with mbrtowc. diff --git a/m4/locale-zh.m4 b/m4/locale-zh.m4 index 7f1a10be83..fb9f26ab9f 100644 --- a/m4/locale-zh.m4 +++ b/m4/locale-zh.m4 @@ -1,5 +1,5 @@ # locale-zh.m4 -# serial 18 +# serial 19 dnl Copyright (C) 2003, 2005-2024 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -22,6 +22,7 @@ AC_DEFUN_ONCE([gt_LOCALE_ZH_CN] #endif #include <stdlib.h> #include <string.h> +#include <wchar.h> struct tm t; char buf[16]; int main () @@ -80,6 +81,19 @@ AC_DEFUN_ONCE([gt_LOCALE_ZH_CN] single wide character. This excludes the GB2312 and GBK encodings. */ if (mblen ("\203\062\332\066", 5) != 4) return 1; + /* Check whether mbrtowc accept this character one byte at a time. + This excludes NetBSD 10.0. */ + if (sizeof (wchar_t) > 2) + { + wchar_t wc; + mbstate_t state; + memset (&state, 0, sizeof (state)); + if (!(mbrtowc (&wc, "\203", 1, &state) == (size_t)(-2) + && mbrtowc (&wc, "\062", 1, &state) == (size_t)(-2) + && mbrtowc (&wc, "\332", 1, &state) == (size_t)(-2) + && mbrtowc (&wc, "\066", 1, &state) == 1)) + return 1; + } return 0; #endif } diff --git a/tests/test-mbrtowc.c b/tests/test-mbrtowc.c index f506e77544..63600a1109 100644 --- a/tests/test-mbrtowc.c +++ b/tests/test-mbrtowc.c @@ -271,6 +271,34 @@ main (int argc, char *argv[]) ASSERT (wc == 'r'); ASSERT (mbsinit (&state)); } + if (sizeof (wchar_t) > 2) + { /* \360\237\220\203 = U+0001F403 */ + memset (&state, '\0', sizeof (mbstate_t)); + + wc = (wchar_t) 0xBADFACE; + ret = mbrtowc (&wc, "\360", 1, &state); + ASSERT (ret == (size_t)(-2)); + ASSERT (wc == (wchar_t) 0xBADFACE); + ASSERT (!mbsinit (&state)); + + wc = (wchar_t) 0xBADFACE; + ret = mbrtowc (&wc, "\237", 1, &state); + ASSERT (ret == (size_t)(-2)); + ASSERT (wc == (wchar_t) 0xBADFACE); + ASSERT (!mbsinit (&state)); + + wc = (wchar_t) 0xBADFACE; + ret = mbrtowc (&wc, "\220", 1, &state); + ASSERT (ret == (size_t)(-2)); + ASSERT (wc == (wchar_t) 0xBADFACE); + ASSERT (!mbsinit (&state)); + + wc = (wchar_t) 0xBADFACE; + ret = mbrtowc (&wc, "\203", 1, &state); + ASSERT (ret == 1); + ASSERT (wctob (wc) == EOF); + ASSERT (mbsinit (&state)); + } return test_exit_status; case '4': @@ -384,6 +412,34 @@ main (int argc, char *argv[]) ASSERT (wc == 'r'); ASSERT (mbsinit (&state)); } + if (sizeof (wchar_t) > 2) + { /* \224\071\311\067 = U+0001F403 */ + memset (&state, '\0', sizeof (mbstate_t)); + + wc = (wchar_t) 0xBADFACE; + ret = mbrtowc (&wc, "\224", 1, &state); + ASSERT (ret == (size_t)(-2)); + ASSERT (wc == (wchar_t) 0xBADFACE); + ASSERT (!mbsinit (&state)); + + wc = (wchar_t) 0xBADFACE; + ret = mbrtowc (&wc, "\071", 1, &state); + ASSERT (ret == (size_t)(-2)); + ASSERT (wc == (wchar_t) 0xBADFACE); + ASSERT (!mbsinit (&state)); + + wc = (wchar_t) 0xBADFACE; + ret = mbrtowc (&wc, "\311", 1, &state); + ASSERT (ret == (size_t)(-2)); + ASSERT (wc == (wchar_t) 0xBADFACE); + ASSERT (!mbsinit (&state)); + + wc = (wchar_t) 0xBADFACE; + ret = mbrtowc (&wc, "\067", 1, &state); + ASSERT (ret == 1); + ASSERT (wctob (wc) == EOF); + ASSERT (mbsinit (&state)); + } return test_exit_status; }