The function mbsrtoc32s is like mbsrtowcs, with char32_t instead of wchar_t.
These patches add the module 'mbsrtoc32s', a bit of preparations, and unit tests. 2020-01-04 Bruno Haible <br...@clisp.org> mbsrtoc32s: Add tests. * tests/test-mbsrtoc32s.c: New file, based on tests/test-mbsrtowcs.c. * tests/test-mbsrtoc32s-1.sh: New file, based on tests/test-mbsrtowcs1.sh. * tests/test-mbsrtoc32s-2.sh: New file, based on tests/test-mbsrtowcs2.sh. * tests/test-mbsrtoc32s-3.sh: New file, based on tests/test-mbsrtowcs3.sh. * tests/test-mbsrtoc32s-4.sh: New file, based on tests/test-mbsrtowcs4.sh. * modules/mbsrtoc32s-tests: New file, based on modules/mbsrtowcs-tests. mbsrtoc32s: New module. * lib/uchar.in.h (mbsrtoc32s): New declaration. * lib/mbsrtowcs-impl.h: Parameterize: Use macros FUNC, DCHAR_T, INTERNAL_STATE, MBRTOWC. * lib/mbsrtowcs.c (FUNC, DCHAR_T, INTERNAL_STATE, MBRTOWC): New macros. * lib/mbsrtoc32s.c: New file. * lib/mbsrtoc32s-state.c: New file, based on lib/mbsrtowcs-state.c. * m4/uchar.m4 (gl_UCHAR_H_DEFAULTS): Initialize GNULIB_MBSRTOC32S. * modules/uchar (Makefile.am): Substitute GNULIB_MBSRTOC32S. * modules/mbsrtoc32s: New file. * tests/test-uchar-c++.cc: Test the signature of mbsrtoc32s. * doc/posix-functions/mbsrtowcs.texi: Mention the new module. 2020-01-04 Bruno Haible <br...@clisp.org> mbrtowc, mbrtoc32: Tighten dependendies. * modules/mbrtowc (Depends-on): Disable hard-locale, mbsinit if REPLACE_MBSTATE_T is 1. (configure.ac): Don't compile lc-charset-dispatch.c and mbtowc-lock.c if REPLACE_MBSTATE_T is 0. * modules/mbrtoc32 (Depends-on): Remove hard-locale, mbsinit. Disable mbrtowc dependency if REPLACE_MBSTATE_T is 1. (configure.ac): Don't compile lc-charset-dispatch.c and mbtowc-lock.c if REPLACE_MBSTATE_T is 0. 2020-01-04 Bruno Haible <br...@clisp.org> uchar: Decide about _GL_LARGE_CHAR32_T at configure time. * m4/uchar.m4 (gl_UCHAR_H): Set SMALL_WCHAR_T. * modules/uchar (Files): Add stdint.m4. (Makefile.am): Substitute SMALL_WCHAR_T. * lib/uchar.in.h (_GL_LARGE_CHAR32_T): Rely on SMALL_WCHAR_T.
From 42883bf8230a40caf82afd5613449fa59e61165a Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Sat, 4 Jan 2020 15:53:35 +0100 Subject: [PATCH 1/4] uchar: Decide about _GL_LARGE_CHAR32_T at configure time. * m4/uchar.m4 (gl_UCHAR_H): Set SMALL_WCHAR_T. * modules/uchar (Files): Add stdint.m4. (Makefile.am): Substitute SMALL_WCHAR_T. * lib/uchar.in.h (_GL_LARGE_CHAR32_T): Rely on SMALL_WCHAR_T. --- ChangeLog | 8 ++++++++ lib/uchar.in.h | 2 +- m4/uchar.m4 | 12 +++++++++++- modules/uchar | 2 ++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2a1cfa2..f35de0f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,13 @@ 2020-01-04 Bruno Haible <br...@clisp.org> + uchar: Decide about _GL_LARGE_CHAR32_T at configure time. + * m4/uchar.m4 (gl_UCHAR_H): Set SMALL_WCHAR_T. + * modules/uchar (Files): Add stdint.m4. + (Makefile.am): Substitute SMALL_WCHAR_T. + * lib/uchar.in.h (_GL_LARGE_CHAR32_T): Rely on SMALL_WCHAR_T. + +2020-01-04 Bruno Haible <br...@clisp.org> + Fix AC_CHECK_DECL so that it deactivates clang's built-in declarations. Reported by Martin Storsjö <mar...@martin.st> in <https://lists.gnu.org/archive/html/bug-gnulib/2020-01/msg00016.html>. diff --git a/lib/uchar.in.h b/lib/uchar.in.h index f377cb4..5241b2e 100644 --- a/lib/uchar.in.h +++ b/lib/uchar.in.h @@ -55,7 +55,7 @@ typedef uint_least32_t char32_t; #endif /* Define if a 'char32_t' can hold more characters than a 'wchar_t'. */ -#if (defined _AIX && !defined __64BIT__) || defined _WIN32 || defined __CYGWIN__ +#if @SMALL_WCHAR_T@ /* 32-bit AIX, Cygwin, native Windows */ # define _GL_LARGE_CHAR32_T 1 #endif diff --git a/m4/uchar.m4 b/m4/uchar.m4 index 2fbe45f..4aebf5c 100644 --- a/m4/uchar.m4 +++ b/m4/uchar.m4 @@ -1,4 +1,4 @@ -# uchar.m4 serial 4 +# uchar.m4 serial 5 dnl Copyright (C) 2019-2020 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -19,6 +19,16 @@ AC_DEFUN_ONCE([gl_UCHAR_H], fi AC_SUBST([HAVE_UCHAR_H]) + dnl Test whether a 'char32_t' can hold more characters than a 'wchar_t'. + gl_STDINT_BITSIZEOF([wchar_t], [gl_STDINT_INCLUDES]) + if test $BITSIZEOF_WCHAR_T -lt 32; then + SMALL_WCHAR_T=1 + else + SMALL_WCHAR_T=0 + fi + dnl SMALL_WCHAR_T is expected to be 1 on 32-bit AIX, Cygwin, native Windows. + AC_SUBST([SMALL_WCHAR_T]) + dnl Check for declarations of anything we want to poison if the dnl corresponding gnulib module is not in use, and which is not dnl guaranteed by C11. diff --git a/modules/uchar b/modules/uchar index f3f83ae..bd1b4e0 100644 --- a/modules/uchar +++ b/modules/uchar @@ -4,6 +4,7 @@ A GNU-like <uchar.h>. Files: lib/uchar.in.h m4/uchar.m4 +m4/stdint.m4 Depends-on: include_next @@ -26,6 +27,7 @@ uchar.h: uchar.in.h $(top_builddir)/config.status $(CXXDEFS_H) -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ -e 's|@''NEXT_UCHAR_H''@|$(NEXT_UCHAR_H)|g' \ + -e 's|@''SMALL_WCHAR_T''@|$(SMALL_WCHAR_T)|g' \ -e 's/@''GNULIB_BTOC32''@/$(GNULIB_BTOC32)/g' \ -e 's/@''GNULIB_C32TOB''@/$(GNULIB_C32TOB)/g' \ -e 's/@''GNULIB_MBRTOC32''@/$(GNULIB_MBRTOC32)/g' \ -- 2.7.4
>From 952b15d9daae382f03ab10725d01e8a4b2a55dfc Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Sat, 4 Jan 2020 15:54:02 +0100 Subject: [PATCH 2/4] mbrtowc, mbrtoc32: Tighten dependendies. * modules/mbrtowc (Depends-on): Disable hard-locale, mbsinit if REPLACE_MBSTATE_T is 1. (configure.ac): Don't compile lc-charset-dispatch.c and mbtowc-lock.c if REPLACE_MBSTATE_T is 0. * modules/mbrtoc32 (Depends-on): Remove hard-locale, mbsinit. Disable mbrtowc dependency if REPLACE_MBSTATE_T is 1. (configure.ac): Don't compile lc-charset-dispatch.c and mbtowc-lock.c if REPLACE_MBSTATE_T is 0. --- ChangeLog | 12 ++++++++++++ modules/mbrtoc32 | 12 ++++++------ modules/mbrtowc | 12 +++++++----- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index f35de0f..91bcb3e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,17 @@ 2020-01-04 Bruno Haible <br...@clisp.org> + mbrtowc, mbrtoc32: Tighten dependendies. + * modules/mbrtowc (Depends-on): Disable hard-locale, mbsinit if + REPLACE_MBSTATE_T is 1. + (configure.ac): Don't compile lc-charset-dispatch.c and mbtowc-lock.c if + REPLACE_MBSTATE_T is 0. + * modules/mbrtoc32 (Depends-on): Remove hard-locale, mbsinit. Disable + mbrtowc dependency if REPLACE_MBSTATE_T is 1. + (configure.ac): Don't compile lc-charset-dispatch.c and mbtowc-lock.c if + REPLACE_MBSTATE_T is 0. + +2020-01-04 Bruno Haible <br...@clisp.org> + uchar: Decide about _GL_LARGE_CHAR32_T at configure time. * m4/uchar.m4 (gl_UCHAR_H): Set SMALL_WCHAR_T. * modules/uchar (Files): Add stdint.m4. diff --git a/modules/mbrtoc32 b/modules/mbrtoc32 index 011b7a9..2575394 100644 --- a/modules/mbrtoc32 +++ b/modules/mbrtoc32 @@ -18,9 +18,7 @@ m4/visibility.m4 Depends-on: uchar -hard-locale [test $HAVE_MBRTOC32 = 0 || test $REPLACE_MBRTOC32 = 1] -mbrtowc [test $HAVE_MBRTOC32 = 0 || test $REPLACE_MBRTOC32 = 1] -mbsinit [test $HAVE_MBRTOC32 = 0 || test $REPLACE_MBRTOC32 = 1] +mbrtowc [{ test $HAVE_MBRTOC32 = 0 || test $REPLACE_MBRTOC32 = 1; } && test $REPLACE_MBSTATE_T = 0] localcharset [test $HAVE_MBRTOC32 = 0 || test $REPLACE_MBRTOC32 = 1] streq [test $HAVE_MBRTOC32 = 0 || test $REPLACE_MBRTOC32 = 1] verify [test $HAVE_MBRTOC32 = 0 || test $REPLACE_MBRTOC32 = 1] @@ -29,10 +27,12 @@ configure.ac: gl_FUNC_MBRTOC32 if test $HAVE_MBRTOC32 = 0 || test $REPLACE_MBRTOC32 = 1; then AC_LIBOBJ([mbrtoc32]) - AC_LIBOBJ([lc-charset-dispatch]) - AC_LIBOBJ([mbtowc-lock]) + if test $REPLACE_MBSTATE_T = 1; then + AC_LIBOBJ([lc-charset-dispatch]) + AC_LIBOBJ([mbtowc-lock]) + gl_PREREQ_MBTOWC_LOCK + fi gl_PREREQ_MBRTOC32 - gl_PREREQ_MBTOWC_LOCK fi gl_UCHAR_MODULE_INDICATOR([mbrtoc32]) diff --git a/modules/mbrtowc b/modules/mbrtowc index ee2e649..294050f 100644 --- a/modules/mbrtowc +++ b/modules/mbrtowc @@ -23,8 +23,8 @@ Depends-on: wchar extensions stdint [test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1] -hard-locale [test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1] -mbsinit [test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1] +hard-locale [{ test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1; } && test $REPLACE_MBSTATE_T = 0] +mbsinit [{ test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1; } && test $REPLACE_MBSTATE_T = 0] localcharset [test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1] streq [test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1] verify [test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1] @@ -33,10 +33,12 @@ configure.ac: gl_FUNC_MBRTOWC if test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1; then AC_LIBOBJ([mbrtowc]) - AC_LIBOBJ([lc-charset-dispatch]) - AC_LIBOBJ([mbtowc-lock]) + if test $REPLACE_MBSTATE_T = 1; then + AC_LIBOBJ([lc-charset-dispatch]) + AC_LIBOBJ([mbtowc-lock]) + gl_PREREQ_MBTOWC_LOCK + fi gl_PREREQ_MBRTOWC - gl_PREREQ_MBTOWC_LOCK fi gl_WCHAR_MODULE_INDICATOR([mbrtowc]) -- 2.7.4
>From ebeebe3c464a92ad25c1f0dfbde2e542307075f0 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Sat, 4 Jan 2020 16:02:31 +0100 Subject: [PATCH 3/4] mbsrtoc32s: New module. * lib/uchar.in.h (mbsrtoc32s): New declaration. * lib/mbsrtowcs-impl.h: Parameterize: Use macros FUNC, DCHAR_T, INTERNAL_STATE, MBRTOWC. * lib/mbsrtowcs.c (FUNC, DCHAR_T, INTERNAL_STATE, MBRTOWC): New macros. * lib/mbsrtoc32s.c: New file. * lib/mbsrtoc32s-state.c: New file, based on lib/mbsrtowcs-state.c. * m4/uchar.m4 (gl_UCHAR_H_DEFAULTS): Initialize GNULIB_MBSRTOC32S. * modules/uchar (Makefile.am): Substitute GNULIB_MBSRTOC32S. * modules/mbsrtoc32s: New file. * tests/test-uchar-c++.cc: Test the signature of mbsrtoc32s. * doc/posix-functions/mbsrtowcs.texi: Mention the new module. --- ChangeLog | 15 +++++++++++ doc/posix-functions/mbsrtowcs.texi | 7 +++-- lib/mbsrtoc32s-state.c | 37 ++++++++++++++++++++++++++ lib/mbsrtoc32s.c | 54 ++++++++++++++++++++++++++++++++++++++ lib/mbsrtowcs-impl.h | 10 +++---- lib/mbsrtowcs.c | 4 +++ lib/uchar.in.h | 12 +++++++++ m4/uchar.m4 | 3 ++- modules/mbsrtoc32s | 36 +++++++++++++++++++++++++ modules/uchar | 1 + tests/test-uchar-c++.cc | 5 ++++ 11 files changed, 176 insertions(+), 8 deletions(-) create mode 100644 lib/mbsrtoc32s-state.c create mode 100644 lib/mbsrtoc32s.c create mode 100644 modules/mbsrtoc32s diff --git a/ChangeLog b/ChangeLog index 91bcb3e..08cceb3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,20 @@ 2020-01-04 Bruno Haible <br...@clisp.org> + mbsrtoc32s: New module. + * lib/uchar.in.h (mbsrtoc32s): New declaration. + * lib/mbsrtowcs-impl.h: Parameterize: Use macros FUNC, DCHAR_T, + INTERNAL_STATE, MBRTOWC. + * lib/mbsrtowcs.c (FUNC, DCHAR_T, INTERNAL_STATE, MBRTOWC): New macros. + * lib/mbsrtoc32s.c: New file. + * lib/mbsrtoc32s-state.c: New file, based on lib/mbsrtowcs-state.c. + * m4/uchar.m4 (gl_UCHAR_H_DEFAULTS): Initialize GNULIB_MBSRTOC32S. + * modules/uchar (Makefile.am): Substitute GNULIB_MBSRTOC32S. + * modules/mbsrtoc32s: New file. + * tests/test-uchar-c++.cc: Test the signature of mbsrtoc32s. + * doc/posix-functions/mbsrtowcs.texi: Mention the new module. + +2020-01-04 Bruno Haible <br...@clisp.org> + mbrtowc, mbrtoc32: Tighten dependendies. * modules/mbrtowc (Depends-on): Disable hard-locale, mbsinit if REPLACE_MBSTATE_T is 1. diff --git a/doc/posix-functions/mbsrtowcs.texi b/doc/posix-functions/mbsrtowcs.texi index e6e4e5a..03c9983 100644 --- a/doc/posix-functions/mbsrtowcs.texi +++ b/doc/posix-functions/mbsrtowcs.texi @@ -22,8 +22,11 @@ mingw. Portability problems not fixed by Gnulib: @itemize @item -On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and therefore cannot -accommodate all Unicode characters. +On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and +therefore cannot accommodate all Unicode characters. +However, the Gnulib function @code{mbsrtoc32s}, provided by Gnulib module +@code{mbsrtoc32s}, operates on 32-bit wide characters and therefore does not +have this limitation. @item The specification is not clear about whether this function should update the conversion state when the first argument (the destination pointer) is NULL. diff --git a/lib/mbsrtoc32s-state.c b/lib/mbsrtoc32s-state.c new file mode 100644 index 0000000..6e041ef --- /dev/null +++ b/lib/mbsrtoc32s-state.c @@ -0,0 +1,37 @@ +/* Convert string to 32-bit wide string. + Copyright (C) 2008-2020 Free Software Foundation, Inc. + Written by Bruno Haible <br...@clisp.org>, 2020. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> + +#include <wchar.h> + +/* Internal state used by the functions mbsrtoc32s() and mbsnrtoc32s(). */ +mbstate_t _gl_mbsrtoc32s_state +/* The state must initially be in the "initial state"; so, zero-initialize it. + On most systems, putting it into BSS is sufficient. Not so on Mac OS X 10.3, + see <https://lists.gnu.org/r/bug-gnulib/2009-01/msg00329.html>. + When it needs an initializer, use 0 or {0} as initializer? 0 only works + when mbstate_t is a scalar type (such as when gnulib defines it, or on + AIX, IRIX, mingw). {0} works as an initializer in all cases: for a struct + or union type, but also for a scalar type (ISO C 99, 6.7.8.(11)). */ +#if defined __ELF__ + /* On ELF systems, variables in BSS behave well. */ +#else + /* Use braces, to be on the safe side. */ + = { 0 } +#endif + ; diff --git a/lib/mbsrtoc32s.c b/lib/mbsrtoc32s.c new file mode 100644 index 0000000..432ffaf --- /dev/null +++ b/lib/mbsrtoc32s.c @@ -0,0 +1,54 @@ +/* Convert string to 32-bit wide string. + Copyright (C) 2020 Free Software Foundation, Inc. + Written by Bruno Haible <br...@clisp.org>, 2020. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include <uchar.h> + +#include <wchar.h> + +#if _GL_LARGE_CHAR32_T + +# include <errno.h> +# include <limits.h> +# include <stdlib.h> + +# include "strnlen1.h" + +extern mbstate_t _gl_mbsrtoc32s_state; + +# define FUNC mbsrtoc32s +# define DCHAR_T char32_t +# define INTERNAL_STATE _gl_mbsrtoc32s_state +# define MBRTOWC mbrtoc32 +# include "mbsrtowcs-impl.h" + +#else +/* char32_t and wchar_t are equivalent. */ + +# include "verify.h" + +verify (sizeof (char32_t) == sizeof (wchar_t)); + +size_t +mbsrtoc32s (char32_t *dest, const char **srcp, size_t len, mbstate_t *ps) +{ + return mbsrtowcs ((wchar_t *) dest, srcp, len, ps); +} + +#endif diff --git a/lib/mbsrtowcs-impl.h b/lib/mbsrtowcs-impl.h index 053417b..06ecec2 100644 --- a/lib/mbsrtowcs-impl.h +++ b/lib/mbsrtowcs-impl.h @@ -16,16 +16,16 @@ along with this program. If not, see <https://www.gnu.org/licenses/>. */ size_t -mbsrtowcs (wchar_t *dest, const char **srcp, size_t len, mbstate_t *ps) +FUNC (DCHAR_T *dest, const char **srcp, size_t len, mbstate_t *ps) { if (ps == NULL) - ps = &_gl_mbsrtowcs_state; + ps = &INTERNAL_STATE; { const char *src = *srcp; if (dest != NULL) { - wchar_t *destptr = dest; + DCHAR_T *destptr = dest; for (; len > 0; destptr++, len--) { @@ -46,7 +46,7 @@ mbsrtowcs (wchar_t *dest, const char **srcp, size_t len, mbstate_t *ps) src_avail = 4 + strnlen1 (src + 4, MB_LEN_MAX - 4); /* Parse the next multibyte character. */ - ret = mbrtowc (destptr, src, src_avail, ps); + ret = MBRTOWC (destptr, src, src_avail, ps); if (ret == (size_t)(-2)) /* Encountered a multibyte character that extends past a '\0' byte @@ -93,7 +93,7 @@ mbsrtowcs (wchar_t *dest, const char **srcp, size_t len, mbstate_t *ps) src_avail = 4 + strnlen1 (src + 4, MB_LEN_MAX - 4); /* Parse the next multibyte character. */ - ret = mbrtowc (NULL, src, src_avail, &state); + ret = MBRTOWC (NULL, src, src_avail, &state); if (ret == (size_t)(-2)) /* Encountered a multibyte character that extends past a '\0' byte diff --git a/lib/mbsrtowcs.c b/lib/mbsrtowcs.c index 0c164e7..6d1c685 100644 --- a/lib/mbsrtowcs.c +++ b/lib/mbsrtowcs.c @@ -29,4 +29,8 @@ extern mbstate_t _gl_mbsrtowcs_state; +#define FUNC mbsrtowcs +#define DCHAR_T wchar_t +#define INTERNAL_STATE _gl_mbsrtowcs_state +#define MBRTOWC mbrtowc #include "mbsrtowcs-impl.h" diff --git a/lib/uchar.in.h b/lib/uchar.in.h index 5241b2e..318cf8e 100644 --- a/lib/uchar.in.h +++ b/lib/uchar.in.h @@ -107,4 +107,16 @@ _GL_WARN_ON_USE (mbrtoc32, "mbrtoc32 is not portable - " #endif +/* Convert a string to a 32-bit wide string. */ +#if @GNULIB_MBSRTOC32S@ +_GL_FUNCDECL_SYS (mbsrtoc32s, size_t, + (char32_t *dest, const char **srcp, size_t len, mbstate_t *ps) + _GL_ARG_NONNULL ((2))); +_GL_CXXALIAS_SYS (mbsrtoc32s, size_t, + (char32_t *dest, const char **srcp, size_t len, + mbstate_t *ps)); +_GL_CXXALIASWARN (mbsrtoc32s); +#endif + + #endif /* _@GUARD_PREFIX@_UCHAR_H */ diff --git a/m4/uchar.m4 b/m4/uchar.m4 index 4aebf5c..e92f5d6 100644 --- a/m4/uchar.m4 +++ b/m4/uchar.m4 @@ -1,4 +1,4 @@ -# uchar.m4 serial 5 +# uchar.m4 serial 6 dnl Copyright (C) 2019-2020 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -50,6 +50,7 @@ AC_DEFUN([gl_UCHAR_H_DEFAULTS], GNULIB_BTOC32=0; AC_SUBST([GNULIB_BTOC32]) GNULIB_C32TOB=0; AC_SUBST([GNULIB_C32TOB]) GNULIB_MBRTOC32=0; AC_SUBST([GNULIB_MBRTOC32]) + GNULIB_MBSRTOC32S=0; AC_SUBST([GNULIB_MBSRTOC32S]) dnl Assume proper GNU behavior unless another module says otherwise. HAVE_MBRTOC32=1; AC_SUBST([HAVE_MBRTOC32]) REPLACE_MBRTOC32=0; AC_SUBST([REPLACE_MBRTOC32]) diff --git a/modules/mbsrtoc32s b/modules/mbsrtoc32s new file mode 100644 index 0000000..66b578e --- /dev/null +++ b/modules/mbsrtoc32s @@ -0,0 +1,36 @@ +Description: +mbsrtoc32s() function: convert string to 32-bit wide string. + +Files: +lib/mbsrtoc32s.c +lib/mbsrtowcs-impl.h +lib/mbsrtoc32s-state.c + +Depends-on: +uchar +wchar +verify +mbrtoc32 [test $SMALL_WCHAR_T = 1] +strnlen1 [test $SMALL_WCHAR_T = 1] +mbsrtowcs [test $SMALL_WCHAR_T = 0] + +configure.ac: +if test $SMALL_WCHAR_T = 1; then + AC_LIBOBJ([mbsrtoc32s-state]) +fi +gl_UCHAR_MODULE_INDICATOR([mbsrtoc32s]) + +Makefile.am: +lib_SOURCES += mbsrtoc32s.c + +Include: +<uchar.h> + +Link: +$(LIB_MBRTOWC) + +License: +LGPLv2+ + +Maintainer: +Bruno Haible diff --git a/modules/uchar b/modules/uchar index bd1b4e0..03101c1 100644 --- a/modules/uchar +++ b/modules/uchar @@ -31,6 +31,7 @@ uchar.h: uchar.in.h $(top_builddir)/config.status $(CXXDEFS_H) -e 's/@''GNULIB_BTOC32''@/$(GNULIB_BTOC32)/g' \ -e 's/@''GNULIB_C32TOB''@/$(GNULIB_C32TOB)/g' \ -e 's/@''GNULIB_MBRTOC32''@/$(GNULIB_MBRTOC32)/g' \ + -e 's/@''GNULIB_MBSRTOC32S''@/$(GNULIB_MBSRTOC32S)/g' \ -e 's|@''HAVE_MBRTOC32''@|$(HAVE_MBRTOC32)|g' \ -e 's|@''REPLACE_MBRTOC32''@|$(REPLACE_MBRTOC32)|g' \ -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ diff --git a/tests/test-uchar-c++.cc b/tests/test-uchar-c++.cc index 56a3a25..a7132a1 100644 --- a/tests/test-uchar-c++.cc +++ b/tests/test-uchar-c++.cc @@ -37,6 +37,11 @@ SIGNATURE_CHECK (GNULIB_NAMESPACE::mbrtoc32, size_t, (char32_t *, const char *, size_t, mbstate_t *)); #endif +#if GNULIB_TEST_MBSRTOC32S +SIGNATURE_CHECK (GNULIB_NAMESPACE::mbsrtoc32s, size_t, + (char32_t *, const char **, size_t, mbstate_t *)); +#endif + int main () -- 2.7.4
From 8145ea17f51d216263209087ff25088d8fadffde Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Sat, 4 Jan 2020 16:03:10 +0100 Subject: [PATCH 4/4] mbsrtoc32s: Add tests. * tests/test-mbsrtoc32s.c: New file, based on tests/test-mbsrtowcs.c. * tests/test-mbsrtoc32s-1.sh: New file, based on tests/test-mbsrtowcs1.sh. * tests/test-mbsrtoc32s-2.sh: New file, based on tests/test-mbsrtowcs2.sh. * tests/test-mbsrtoc32s-3.sh: New file, based on tests/test-mbsrtowcs3.sh. * tests/test-mbsrtoc32s-4.sh: New file, based on tests/test-mbsrtowcs4.sh. * modules/mbsrtoc32s-tests: New file, based on modules/mbsrtowcs-tests. --- ChangeLog | 12 ++ modules/mbsrtoc32s-tests | 34 ++++++ tests/test-mbsrtoc32s-1.sh | 15 +++ tests/test-mbsrtoc32s-2.sh | 15 +++ tests/test-mbsrtoc32s-3.sh | 15 +++ tests/test-mbsrtoc32s-4.sh | 15 +++ tests/test-mbsrtoc32s.c | 293 +++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 399 insertions(+) create mode 100644 modules/mbsrtoc32s-tests create mode 100755 tests/test-mbsrtoc32s-1.sh create mode 100755 tests/test-mbsrtoc32s-2.sh create mode 100755 tests/test-mbsrtoc32s-3.sh create mode 100755 tests/test-mbsrtoc32s-4.sh create mode 100644 tests/test-mbsrtoc32s.c diff --git a/ChangeLog b/ChangeLog index 08cceb3..ecf3650 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,17 @@ 2020-01-04 Bruno Haible <br...@clisp.org> + mbsrtoc32s: Add tests. + * tests/test-mbsrtoc32s.c: New file, based on tests/test-mbsrtowcs.c. + * tests/test-mbsrtoc32s-1.sh: New file, based on + tests/test-mbsrtowcs1.sh. + * tests/test-mbsrtoc32s-2.sh: New file, based on + tests/test-mbsrtowcs2.sh. + * tests/test-mbsrtoc32s-3.sh: New file, based on + tests/test-mbsrtowcs3.sh. + * tests/test-mbsrtoc32s-4.sh: New file, based on + tests/test-mbsrtowcs4.sh. + * modules/mbsrtoc32s-tests: New file, based on modules/mbsrtowcs-tests. + mbsrtoc32s: New module. * lib/uchar.in.h (mbsrtoc32s): New declaration. * lib/mbsrtowcs-impl.h: Parameterize: Use macros FUNC, DCHAR_T, diff --git a/modules/mbsrtoc32s-tests b/modules/mbsrtoc32s-tests new file mode 100644 index 0000000..dd1881e --- /dev/null +++ b/modules/mbsrtoc32s-tests @@ -0,0 +1,34 @@ +Files: +tests/test-mbsrtoc32s-1.sh +tests/test-mbsrtoc32s-2.sh +tests/test-mbsrtoc32s-3.sh +tests/test-mbsrtoc32s-4.sh +tests/test-mbsrtoc32s.c +tests/signature.h +tests/macros.h +m4/locale-fr.m4 +m4/locale-ja.m4 +m4/locale-zh.m4 +m4/codeset.m4 + +Depends-on: +mbrtoc32 +mbsinit +c32tob +setlocale + +configure.ac: +gt_LOCALE_FR +gt_LOCALE_FR_UTF8 +gt_LOCALE_JA +gt_LOCALE_ZH_CN + +Makefile.am: +TESTS += test-mbsrtoc32s-1.sh test-mbsrtoc32s-2.sh test-mbsrtoc32s-3.sh test-mbsrtoc32s-4.sh +TESTS_ENVIRONMENT += \ + LOCALE_FR='@LOCALE_FR@' \ + LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' \ + LOCALE_JA='@LOCALE_JA@' \ + LOCALE_ZH_CN='@LOCALE_ZH_CN@' +check_PROGRAMS += test-mbsrtoc32s +test_mbsrtoc32s_LDADD = $(LDADD) $(LIB_SETLOCALE) $(LIB_MBRTOWC) diff --git a/tests/test-mbsrtoc32s-1.sh b/tests/test-mbsrtoc32s-1.sh new file mode 100755 index 0000000..c957b80 --- /dev/null +++ b/tests/test-mbsrtoc32s-1.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test in an ISO-8859-1 or ISO-8859-15 locale. +: ${LOCALE_FR=fr_FR} +if test $LOCALE_FR = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no traditional french locale is installed" + else + echo "Skipping test: no traditional french locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_FR \ +${CHECKER} ./test-mbsrtoc32s${EXEEXT} 1 diff --git a/tests/test-mbsrtoc32s-2.sh b/tests/test-mbsrtoc32s-2.sh new file mode 100755 index 0000000..bb893d1 --- /dev/null +++ b/tests/test-mbsrtoc32s-2.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test whether a specific UTF-8 locale is installed. +: ${LOCALE_FR_UTF8=fr_FR.UTF-8} +if test $LOCALE_FR_UTF8 = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no french Unicode locale is installed" + else + echo "Skipping test: no french Unicode locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_FR_UTF8 \ +${CHECKER} ./test-mbsrtoc32s${EXEEXT} 2 diff --git a/tests/test-mbsrtoc32s-3.sh b/tests/test-mbsrtoc32s-3.sh new file mode 100755 index 0000000..5573412 --- /dev/null +++ b/tests/test-mbsrtoc32s-3.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test whether a specific EUC-JP locale is installed. +: ${LOCALE_JA=ja_JP} +if test $LOCALE_JA = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no traditional japanese locale is installed" + else + echo "Skipping test: no traditional japanese locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_JA \ +${CHECKER} ./test-mbsrtoc32s${EXEEXT} 3 diff --git a/tests/test-mbsrtoc32s-4.sh b/tests/test-mbsrtoc32s-4.sh new file mode 100755 index 0000000..faf4600 --- /dev/null +++ b/tests/test-mbsrtoc32s-4.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test whether a specific GB18030 locale is installed. +: ${LOCALE_ZH_CN=zh_CN.GB18030} +if test $LOCALE_ZH_CN = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no transitional chinese locale is installed" + else + echo "Skipping test: no transitional chinese locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_ZH_CN \ +${CHECKER} ./test-mbsrtoc32s${EXEEXT} 4 diff --git a/tests/test-mbsrtoc32s.c b/tests/test-mbsrtoc32s.c new file mode 100644 index 0000000..cd99416 --- /dev/null +++ b/tests/test-mbsrtoc32s.c @@ -0,0 +1,293 @@ +/* Test of conversion of string to 32-bit wide string. + Copyright (C) 2008-2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <br...@clisp.org>, 2008. */ + +#include <config.h> + +#include <uchar.h> + +#include "signature.h" +SIGNATURE_CHECK (mbsrtoc32s, size_t, (char32_t *, char const **, size_t, + mbstate_t *)); + +#include <locale.h> +#include <stdio.h> +#include <string.h> + +#include "macros.h" + +int +main (int argc, char *argv[]) +{ + mbstate_t state; + char32_t wc; + size_t ret; + + /* configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, "") == NULL) + return 1; + + /* Test NUL byte input. */ + { + const char *src; + + memset (&state, '\0', sizeof (mbstate_t)); + + src = ""; + ret = mbsrtoc32s (NULL, &src, 0, &state); + ASSERT (ret == 0); + ASSERT (mbsinit (&state)); + + src = ""; + ret = mbsrtoc32s (NULL, &src, 1, &state); + ASSERT (ret == 0); + ASSERT (mbsinit (&state)); + + wc = (char32_t) 0xBADFACE; + src = ""; + ret = mbsrtoc32s (&wc, &src, 0, &state); + ASSERT (ret == 0); + ASSERT (wc == (char32_t) 0xBADFACE); + ASSERT (mbsinit (&state)); + + wc = (char32_t) 0xBADFACE; + src = ""; + ret = mbsrtoc32s (&wc, &src, 1, &state); + ASSERT (ret == 0); + ASSERT (wc == 0); + ASSERT (mbsinit (&state)); + } + + if (argc > 1) + { + int unlimited; + + for (unlimited = 0; unlimited < 2; unlimited++) + { + #define BUFSIZE 10 + char32_t buf[BUFSIZE]; + const char *src; + mbstate_t temp_state; + + { + size_t i; + for (i = 0; i < BUFSIZE; i++) + buf[i] = (char32_t) 0xBADFACE; + } + + switch (argv[1][0]) + { + case '1': + /* Locale encoding is ISO-8859-1 or ISO-8859-15. */ + { + char input[] = "B\374\337er"; /* "Büßer" */ + memset (&state, '\0', sizeof (mbstate_t)); + + wc = (char32_t) 0xBADFACE; + ret = mbrtoc32 (&wc, input, 1, &state); + ASSERT (ret == 1); + ASSERT (wc == 'B'); + ASSERT (mbsinit (&state)); + input[0] = '\0'; + + wc = (char32_t) 0xBADFACE; + ret = mbrtoc32 (&wc, input + 1, 1, &state); + ASSERT (ret == 1); + ASSERT (c32tob (wc) == (unsigned char) '\374'); + ASSERT (mbsinit (&state)); + input[1] = '\0'; + + src = input + 2; + temp_state = state; + ret = mbsrtoc32s (NULL, &src, unlimited ? BUFSIZE : 1, &temp_state); + ASSERT (ret == 3); + ASSERT (src == input + 2); + ASSERT (mbsinit (&state)); + + src = input + 2; + ret = mbsrtoc32s (buf, &src, unlimited ? BUFSIZE : 1, &state); + ASSERT (ret == (unlimited ? 3 : 1)); + ASSERT (src == (unlimited ? NULL : input + 3)); + ASSERT (c32tob (buf[0]) == (unsigned char) '\337'); + if (unlimited) + { + ASSERT (buf[1] == 'e'); + ASSERT (buf[2] == 'r'); + ASSERT (buf[3] == 0); + ASSERT (buf[4] == (char32_t) 0xBADFACE); + } + else + ASSERT (buf[1] == (char32_t) 0xBADFACE); + ASSERT (mbsinit (&state)); + } + break; + + case '2': + /* Locale encoding is UTF-8. */ + { + char input[] = "B\303\274\303\237er"; /* "Büßer" */ + memset (&state, '\0', sizeof (mbstate_t)); + + wc = (char32_t) 0xBADFACE; + ret = mbrtoc32 (&wc, input, 1, &state); + ASSERT (ret == 1); + ASSERT (wc == 'B'); + ASSERT (mbsinit (&state)); + input[0] = '\0'; + + wc = (char32_t) 0xBADFACE; + ret = mbrtoc32 (&wc, input + 1, 1, &state); + ASSERT (ret == (size_t)(-2)); + ASSERT (wc == (char32_t) 0xBADFACE); + ASSERT (!mbsinit (&state)); + input[1] = '\0'; + + src = input + 2; + temp_state = state; + ret = mbsrtoc32s (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state); + ASSERT (ret == 4); + ASSERT (src == input + 2); + ASSERT (!mbsinit (&state)); + + src = input + 2; + ret = mbsrtoc32s (buf, &src, unlimited ? BUFSIZE : 2, &state); + ASSERT (ret == (unlimited ? 4 : 2)); + ASSERT (src == (unlimited ? NULL : input + 5)); + ASSERT (c32tob (buf[0]) == EOF); + ASSERT (c32tob (buf[1]) == EOF); + if (unlimited) + { + ASSERT (buf[2] == 'e'); + ASSERT (buf[3] == 'r'); + ASSERT (buf[4] == 0); + ASSERT (buf[5] == (char32_t) 0xBADFACE); + } + else + ASSERT (buf[2] == (char32_t) 0xBADFACE); + ASSERT (mbsinit (&state)); + } + break; + + case '3': + /* Locale encoding is EUC-JP. */ + { + char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */ + memset (&state, '\0', sizeof (mbstate_t)); + + wc = (char32_t) 0xBADFACE; + ret = mbrtoc32 (&wc, input, 1, &state); + ASSERT (ret == 1); + ASSERT (wc == '<'); + ASSERT (mbsinit (&state)); + input[0] = '\0'; + + wc = (char32_t) 0xBADFACE; + ret = mbrtoc32 (&wc, input + 1, 2, &state); + ASSERT (ret == 2); + ASSERT (c32tob (wc) == EOF); + ASSERT (mbsinit (&state)); + input[1] = '\0'; + input[2] = '\0'; + + wc = (char32_t) 0xBADFACE; + ret = mbrtoc32 (&wc, input + 3, 1, &state); + ASSERT (ret == (size_t)(-2)); + ASSERT (wc == (char32_t) 0xBADFACE); + ASSERT (!mbsinit (&state)); + input[3] = '\0'; + + src = input + 4; + temp_state = state; + ret = mbsrtoc32s (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state); + ASSERT (ret == 3); + ASSERT (src == input + 4); + ASSERT (!mbsinit (&state)); + + src = input + 4; + ret = mbsrtoc32s (buf, &src, unlimited ? BUFSIZE : 2, &state); + ASSERT (ret == (unlimited ? 3 : 2)); + ASSERT (src == (unlimited ? NULL : input + 7)); + ASSERT (c32tob (buf[0]) == EOF); + ASSERT (c32tob (buf[1]) == EOF); + if (unlimited) + { + ASSERT (buf[2] == '>'); + ASSERT (buf[3] == 0); + ASSERT (buf[4] == (char32_t) 0xBADFACE); + } + else + ASSERT (buf[2] == (char32_t) 0xBADFACE); + ASSERT (mbsinit (&state)); + } + break; + + case '4': + /* Locale encoding is GB18030. */ + { + char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */ + memset (&state, '\0', sizeof (mbstate_t)); + + wc = (char32_t) 0xBADFACE; + ret = mbrtoc32 (&wc, input, 1, &state); + ASSERT (ret == 1); + ASSERT (wc == 'B'); + ASSERT (mbsinit (&state)); + input[0] = '\0'; + + wc = (char32_t) 0xBADFACE; + ret = mbrtoc32 (&wc, input + 1, 1, &state); + ASSERT (ret == (size_t)(-2)); + ASSERT (wc == (char32_t) 0xBADFACE); + ASSERT (!mbsinit (&state)); + input[1] = '\0'; + + src = input + 2; + temp_state = state; + ret = mbsrtoc32s (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state); + ASSERT (ret == 4); + ASSERT (src == input + 2); + ASSERT (!mbsinit (&state)); + + src = input + 2; + ret = mbsrtoc32s (buf, &src, unlimited ? BUFSIZE : 2, &state); + ASSERT (ret == (unlimited ? 4 : 2)); + ASSERT (src == (unlimited ? NULL : input + 7)); + ASSERT (c32tob (buf[0]) == EOF); + ASSERT (c32tob (buf[1]) == EOF); + if (unlimited) + { + ASSERT (buf[2] == 'e'); + ASSERT (buf[3] == 'r'); + ASSERT (buf[4] == 0); + ASSERT (buf[5] == (char32_t) 0xBADFACE); + } + else + ASSERT (buf[2] == (char32_t) 0xBADFACE); + ASSERT (mbsinit (&state)); + } + break; + + default: + return 1; + } + } + + return 0; + } + + return 1; +} -- 2.7.4