The function c32stombs is like wcstombs, with 'wchar_t' replaced by 'char32_t'.
2020-01-12 Bruno Haible <br...@clisp.org> c32stombs: Add tests. * tests/test-c32stombs.c: New file, based on tests/test-c32srtombs.c. * tests/test-c32stombs-1.sh: New file, based on tests/test-c32srtombs-1.sh. * tests/test-c32stombs-2.sh: New file, based on tests/test-c32srtombs-2.sh. * tests/test-c32stombs-3.sh: New file, based on tests/test-c32srtombs-3.sh. * tests/test-c32stombs-4.sh: New file, based on tests/test-c32srtombs-4.sh. * modules/c32stombs-tests: New file, based on modules/c32srtombs-tests. c32stombs: New module. * lib/uchar.in.h (c32stombs): New declaration. * lib/c32stombs.c: New file. * m4/uchar.m4 (gl_UCHAR_H_DEFAULTS): Initialize GNULIB_C32STOMBS. * modules/uchar (Makefile.am): Substitute GNULIB_C32STOMBS. * modules/c32stombs: New file. * tests/test-uchar-c++.cc: Test the signature of c32stombs. * doc/posix-functions/wcstombs.texi: Mention the new module.
>From 0296188c2fea8f448b846e5510b8f7873bb79b53 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Mon, 13 Jan 2020 00:16:48 +0100 Subject: [PATCH 1/2] c32stombs: New module. * lib/uchar.in.h (c32stombs): New declaration. * lib/c32stombs.c: New file. * m4/uchar.m4 (gl_UCHAR_H_DEFAULTS): Initialize GNULIB_C32STOMBS. * modules/uchar (Makefile.am): Substitute GNULIB_C32STOMBS. * modules/c32stombs: New file. * tests/test-uchar-c++.cc: Test the signature of c32stombs. * doc/posix-functions/wcstombs.texi: Mention the new module. --- ChangeLog | 11 +++++++++++ doc/posix-functions/wcstombs.texi | 7 +++++-- lib/c32stombs.c | 33 +++++++++++++++++++++++++++++++++ lib/uchar.in.h | 11 +++++++++++ m4/uchar.m4 | 3 ++- modules/c32stombs | 25 +++++++++++++++++++++++++ modules/uchar | 1 + tests/test-uchar-c++.cc | 5 +++++ 8 files changed, 93 insertions(+), 3 deletions(-) create mode 100644 lib/c32stombs.c create mode 100644 modules/c32stombs diff --git a/ChangeLog b/ChangeLog index 9d90e3c..c2f4f24 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2020-01-12 Bruno Haible <br...@clisp.org> + + c32stombs: New module. + * lib/uchar.in.h (c32stombs): New declaration. + * lib/c32stombs.c: New file. + * m4/uchar.m4 (gl_UCHAR_H_DEFAULTS): Initialize GNULIB_C32STOMBS. + * modules/uchar (Makefile.am): Substitute GNULIB_C32STOMBS. + * modules/c32stombs: New file. + * tests/test-uchar-c++.cc: Test the signature of c32stombs. + * doc/posix-functions/wcstombs.texi: Mention the new module. + 2020-01-11 Jim Meyering <meyer...@fb.com> perl: require the "warnings" module diff --git a/doc/posix-functions/wcstombs.texi b/doc/posix-functions/wcstombs.texi index 2bca1cc..dd8492e 100644 --- a/doc/posix-functions/wcstombs.texi +++ b/doc/posix-functions/wcstombs.texi @@ -13,6 +13,9 @@ Portability problems fixed by Gnulib: Portability problems not fixed by Gnulib: @itemize @item -On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and therefore cannot -accommodate all Unicode characters. +On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and +therefore cannot accommodate all Unicode characters. +However, the Gnulib function @code{c32stombs}, provided by Gnulib module +@code{c32stombs}, operates on 32-bit wide characters and therefore does not +have this limitation. @end itemize diff --git a/lib/c32stombs.c b/lib/c32stombs.c new file mode 100644 index 0000000..01f7081 --- /dev/null +++ b/lib/c32stombs.c @@ -0,0 +1,33 @@ +/* Convert 32-bit wide string to string. + Copyright (C) 2020 Free Software Foundation, Inc. + Written by Bruno Haible <br...@clisp.org>, 2020. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include <uchar.h> + +#include <string.h> +#include <wchar.h> + +size_t +c32stombs (char *dest, const char32_t *src, size_t len) +{ + mbstate_t state; + + memset (&state, '\0', sizeof (mbstate_t)); + return c32srtombs (dest, &src, len, &state); +} diff --git a/lib/uchar.in.h b/lib/uchar.in.h index a149f5a..5cf573d 100644 --- a/lib/uchar.in.h +++ b/lib/uchar.in.h @@ -118,6 +118,17 @@ _GL_CXXALIASWARN (c32srtombs); #endif +/* Convert a 32-bit wide string to a string. */ +#if @GNULIB_C32STOMBS@ +_GL_FUNCDECL_SYS (c32stombs, size_t, + (char *dest, const char32_t *src, size_t len) + _GL_ARG_NONNULL ((2))); +_GL_CXXALIAS_SYS (c32stombs, size_t, + (char *dest, const char32_t *src, size_t len)); +_GL_CXXALIASWARN (c32stombs); +#endif + + /* Converts a 32-bit wide character to unibyte character. Returns the single-byte representation of WC if it exists, or EOF otherwise. */ diff --git a/m4/uchar.m4 b/m4/uchar.m4 index 2a84bd1..b5edc8c 100644 --- a/m4/uchar.m4 +++ b/m4/uchar.m4 @@ -1,4 +1,4 @@ -# uchar.m4 serial 11 +# uchar.m4 serial 12 dnl Copyright (C) 2019-2020 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -51,6 +51,7 @@ AC_DEFUN([gl_UCHAR_H_DEFAULTS], GNULIB_C32RTOMB=0; AC_SUBST([GNULIB_C32RTOMB]) GNULIB_C32SNRTOMBS=0; AC_SUBST([GNULIB_C32SNRTOMBS]) GNULIB_C32SRTOMBS=0; AC_SUBST([GNULIB_C32SRTOMBS]) + GNULIB_C32STOMBS=0; AC_SUBST([GNULIB_C32STOMBS]) GNULIB_C32TOB=0; AC_SUBST([GNULIB_C32TOB]) GNULIB_MBRTOC32=0; AC_SUBST([GNULIB_MBRTOC32]) GNULIB_MBSNRTOC32S=0; AC_SUBST([GNULIB_MBSNRTOC32S]) diff --git a/modules/c32stombs b/modules/c32stombs new file mode 100644 index 0000000..2741877 --- /dev/null +++ b/modules/c32stombs @@ -0,0 +1,25 @@ +Description: +c32stombs() function: convert 32-bit wide string to string. + +Files: +lib/c32stombs.c + +Depends-on: +uchar +wchar +c32srtombs + +configure.ac: +gl_UCHAR_MODULE_INDICATOR([c32stombs]) + +Makefile.am: +lib_SOURCES += c32stombs.c + +Include: +<uchar.h> + +License: +LGPL + +Maintainer: +Bruno Haible diff --git a/modules/uchar b/modules/uchar index c7e86ed..595bac5 100644 --- a/modules/uchar +++ b/modules/uchar @@ -32,6 +32,7 @@ uchar.h: uchar.in.h $(top_builddir)/config.status $(CXXDEFS_H) -e 's/@''GNULIB_C32RTOMB''@/$(GNULIB_C32RTOMB)/g' \ -e 's/@''GNULIB_C32SNRTOMBS''@/$(GNULIB_C32SNRTOMBS)/g' \ -e 's/@''GNULIB_C32SRTOMBS''@/$(GNULIB_C32SRTOMBS)/g' \ + -e 's/@''GNULIB_C32STOMBS''@/$(GNULIB_C32STOMBS)/g' \ -e 's/@''GNULIB_C32TOB''@/$(GNULIB_C32TOB)/g' \ -e 's/@''GNULIB_MBRTOC32''@/$(GNULIB_MBRTOC32)/g' \ -e 's/@''GNULIB_MBSNRTOC32S''@/$(GNULIB_MBSNRTOC32S)/g' \ diff --git a/tests/test-uchar-c++.cc b/tests/test-uchar-c++.cc index ae97d9c..1855b7e 100644 --- a/tests/test-uchar-c++.cc +++ b/tests/test-uchar-c++.cc @@ -43,6 +43,11 @@ SIGNATURE_CHECK (GNULIB_NAMESPACE::c32srtombs, size_t, (char *, const char32_t **, size_t, mbstate_t *)); #endif +#if GNULIB_TEST_C32STOMBS +SIGNATURE_CHECK (GNULIB_NAMESPACE::c32stombs, size_t, + (char *, const char32_t *, size_t)); +#endif + #if GNULIB_TEST_C32TOB SIGNATURE_CHECK (GNULIB_NAMESPACE::c32tob, int, (wint_t)); #endif -- 2.7.4
From 611869be9f1083e53305446d90a2909fc89914ef Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Mon, 13 Jan 2020 00:17:47 +0100 Subject: [PATCH 2/2] c32stombs: Add tests. * tests/test-c32stombs.c: New file, based on tests/test-c32srtombs.c. * tests/test-c32stombs-1.sh: New file, based on tests/test-c32srtombs-1.sh. * tests/test-c32stombs-2.sh: New file, based on tests/test-c32srtombs-2.sh. * tests/test-c32stombs-3.sh: New file, based on tests/test-c32srtombs-3.sh. * tests/test-c32stombs-4.sh: New file, based on tests/test-c32srtombs-4.sh. * modules/c32stombs-tests: New file, based on modules/c32srtombs-tests. --- ChangeLog | 12 ++++ modules/c32stombs-tests | 32 +++++++++ tests/test-c32stombs-1.sh | 15 ++++ tests/test-c32stombs-2.sh | 15 ++++ tests/test-c32stombs-3.sh | 15 ++++ tests/test-c32stombs-4.sh | 15 ++++ tests/test-c32stombs.c | 177 ++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 281 insertions(+) create mode 100644 modules/c32stombs-tests create mode 100755 tests/test-c32stombs-1.sh create mode 100755 tests/test-c32stombs-2.sh create mode 100755 tests/test-c32stombs-3.sh create mode 100755 tests/test-c32stombs-4.sh create mode 100644 tests/test-c32stombs.c diff --git a/ChangeLog b/ChangeLog index c2f4f24..245f2a7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,17 @@ 2020-01-12 Bruno Haible <br...@clisp.org> + c32stombs: Add tests. + * tests/test-c32stombs.c: New file, based on tests/test-c32srtombs.c. + * tests/test-c32stombs-1.sh: New file, based on + tests/test-c32srtombs-1.sh. + * tests/test-c32stombs-2.sh: New file, based on + tests/test-c32srtombs-2.sh. + * tests/test-c32stombs-3.sh: New file, based on + tests/test-c32srtombs-3.sh. + * tests/test-c32stombs-4.sh: New file, based on + tests/test-c32srtombs-4.sh. + * modules/c32stombs-tests: New file, based on modules/c32srtombs-tests. + c32stombs: New module. * lib/uchar.in.h (c32stombs): New declaration. * lib/c32stombs.c: New file. diff --git a/modules/c32stombs-tests b/modules/c32stombs-tests new file mode 100644 index 0000000..c96de03 --- /dev/null +++ b/modules/c32stombs-tests @@ -0,0 +1,32 @@ +Files: +tests/test-c32stombs-1.sh +tests/test-c32stombs-2.sh +tests/test-c32stombs-3.sh +tests/test-c32stombs-4.sh +tests/test-c32stombs.c +tests/signature.h +tests/macros.h +m4/locale-fr.m4 +m4/locale-ja.m4 +m4/locale-zh.m4 +m4/codeset.m4 + +Depends-on: +setlocale +mbstoc32s + +configure.ac: +gt_LOCALE_FR +gt_LOCALE_FR_UTF8 +gt_LOCALE_JA +gt_LOCALE_ZH_CN + +Makefile.am: +TESTS += test-c32stombs-1.sh test-c32stombs-2.sh test-c32stombs-3.sh test-c32stombs-4.sh +TESTS_ENVIRONMENT += \ + LOCALE_FR='@LOCALE_FR@' \ + LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' \ + LOCALE_JA='@LOCALE_JA@' \ + LOCALE_ZH_CN='@LOCALE_ZH_CN@' +check_PROGRAMS += test-c32stombs +test_c32stombs_LDADD = $(LDADD) $(LIB_SETLOCALE) diff --git a/tests/test-c32stombs-1.sh b/tests/test-c32stombs-1.sh new file mode 100755 index 0000000..ea63e9a --- /dev/null +++ b/tests/test-c32stombs-1.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test in an ISO-8859-1 or ISO-8859-15 locale. +: ${LOCALE_FR=fr_FR} +if test $LOCALE_FR = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no traditional french locale is installed" + else + echo "Skipping test: no traditional french locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_FR \ +${CHECKER} ./test-c32stombs${EXEEXT} 1 diff --git a/tests/test-c32stombs-2.sh b/tests/test-c32stombs-2.sh new file mode 100755 index 0000000..573a92f --- /dev/null +++ b/tests/test-c32stombs-2.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test whether a specific UTF-8 locale is installed. +: ${LOCALE_FR_UTF8=fr_FR.UTF-8} +if test $LOCALE_FR_UTF8 = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no french Unicode locale is installed" + else + echo "Skipping test: no french Unicode locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_FR_UTF8 \ +${CHECKER} ./test-c32stombs${EXEEXT} 2 diff --git a/tests/test-c32stombs-3.sh b/tests/test-c32stombs-3.sh new file mode 100755 index 0000000..c3c91e7 --- /dev/null +++ b/tests/test-c32stombs-3.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test whether a specific EUC-JP locale is installed. +: ${LOCALE_JA=ja_JP} +if test $LOCALE_JA = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no traditional japanese locale is installed" + else + echo "Skipping test: no traditional japanese locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_JA \ +${CHECKER} ./test-c32stombs${EXEEXT} 3 diff --git a/tests/test-c32stombs-4.sh b/tests/test-c32stombs-4.sh new file mode 100755 index 0000000..ae3b130 --- /dev/null +++ b/tests/test-c32stombs-4.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test whether a specific GB18030 locale is installed. +: ${LOCALE_ZH_CN=zh_CN.GB18030} +if test $LOCALE_ZH_CN = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no transitional chinese locale is installed" + else + echo "Skipping test: no transitional chinese locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_ZH_CN \ +${CHECKER} ./test-c32stombs${EXEEXT} 4 diff --git a/tests/test-c32stombs.c b/tests/test-c32stombs.c new file mode 100644 index 0000000..ebf542c --- /dev/null +++ b/tests/test-c32stombs.c @@ -0,0 +1,177 @@ +/* Test of conversion of 32-bit wide string to string. + Copyright (C) 2008-2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <br...@clisp.org>, 2008. */ + +#include <config.h> + +#include <uchar.h> + +#include "signature.h" +SIGNATURE_CHECK (c32stombs, size_t, + (char *, const char32_t *, size_t)); + +#include <locale.h> +#include <stdlib.h> +#include <string.h> + +#include "macros.h" + +int +main (int argc, char *argv[]) +{ + /* configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, "") == NULL) + return 1; + + if (argc > 1) + { + char32_t input[10]; + size_t n; + #define BUFSIZE 20 + char buf[BUFSIZE]; + size_t ret; + + { + size_t i; + for (i = 0; i < BUFSIZE; i++) + buf[i] = '_'; + } + + switch (argv[1][0]) + { + case '1': + /* Locale encoding is ISO-8859-1 or ISO-8859-15. */ + { + const char original[] = "B\374\337er"; /* "Büßer" */ + + ret = mbstoc32s (input, original, 10); + ASSERT (ret == 5); + + for (n = 0; n < 10; n++) + { + ret = c32stombs (NULL, input, n); + ASSERT (ret == 5); + + ret = c32stombs (buf, input, n); + ASSERT (ret == (n <= 5 ? n : 5)); + ASSERT (memcmp (buf, original, ret) == 0); + if (n > 5) + ASSERT (buf[ret] == '\0'); + ASSERT (buf[ret + (n > 5) + 0] == '_'); + ASSERT (buf[ret + (n > 5) + 1] == '_'); + ASSERT (buf[ret + (n > 5) + 2] == '_'); + } + } + break; + + case '2': + /* Locale encoding is UTF-8. */ + { + const char original[] = "s\303\274\303\237\360\237\230\213!"; /* "süß😋!" */ + + ret = mbstoc32s (input, original, 10); + ASSERT (ret == 5); + + for (n = 0; n < 15; n++) + { + ret = c32stombs (NULL, input, n); + ASSERT (ret == 10); + + ret = c32stombs (buf, input, n); + ASSERT (ret == (n < 1 ? n : + n < 3 ? 1 : + n < 5 ? 3 : + n < 9 ? 5 : + n <= 10 ? n : 10)); + ASSERT (memcmp (buf, original, ret) == 0); + if (n > 10) + ASSERT (buf[ret] == '\0'); + ASSERT (buf[ret + (n > 10) + 0] == '_'); + ASSERT (buf[ret + (n > 10) + 1] == '_'); + ASSERT (buf[ret + (n > 10) + 2] == '_'); + } + } + break; + + case '3': + /* Locale encoding is EUC-JP. */ + { + const char original[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */ + + ret = mbstoc32s (input, original, 10); + ASSERT (ret == 5); + + for (n = 0; n < 10; n++) + { + ret = c32stombs (NULL, input, n); + ASSERT (ret == 8); + + ret = c32stombs (buf, input, n); + ASSERT (ret == (n < 1 ? n : + n < 3 ? 1 : + n < 5 ? 3 : + n < 7 ? 5 : + n <= 8 ? n : 8)); + ASSERT (memcmp (buf, original, ret) == 0); + if (n > 8) + ASSERT (buf[ret] == '\0'); + ASSERT (buf[ret + (n > 8) + 0] == '_'); + ASSERT (buf[ret + (n > 8) + 1] == '_'); + ASSERT (buf[ret + (n > 8) + 2] == '_'); + } + } + break; + + + case '4': + /* Locale encoding is GB18030. */ + { + const char original[] = "s\250\271\201\060\211\070\224\071\375\067!"; /* "süß😋!" */ + + ret = mbstoc32s (input, original, 10); + ASSERT (ret == 5); + + for (n = 0; n < 15; n++) + { + ret = c32stombs (NULL, input, n); + ASSERT (ret == 12); + + ret = c32stombs (buf, input, n); + ASSERT (ret == (n < 1 ? n : + n < 3 ? 1 : + n < 7 ? 3 : + n < 11 ? 7 : + n <= 12 ? n : 12)); + ASSERT (memcmp (buf, original, ret) == 0); + if (n > 12) + ASSERT (buf[ret] == '\0'); + ASSERT (buf[ret + (n > 12) + 0] == '_'); + ASSERT (buf[ret + (n > 12) + 1] == '_'); + ASSERT (buf[ret + (n > 12) + 2] == '_'); + } + } + break; + + default: + return 1; + } + + return 0; + } + + return 1; +} -- 2.7.4