Still working on making vasnprintf and u*_vasnprintf able to grok arguments with more than 2^31 elements. Here: wide strings and the %ls directive.
2024-06-19 Bruno Haible <br...@clisp.org> vasnprintf, u*-asnprintf tests: Add test of huge %ls arguments. * tests/test-vasnprintf-big.c: Include <wchar.h>. (main): Add tests for wide string arguments with > 2^31 wide characters. * tests/unistdio/test-u8-asnprintf-big.c: Include <wchar.h>. (main): Add tests for wide string arguments with > 2^31 wide characters. * tests/unistdio/test-ulc-asnprintf-big.c: Include <wchar.h>. (main): Add tests for wide string arguments with > 2^31 wide characters. vasnprintf, u*-vasnprintf: Support huge wide string arguments. * lib/vasnprintf.c: (VASNPRINTF): In 64-bit builds, handle the %ls directive ourselves. (local_wcslen, local_wcrtomb): Adjust #if condition.
>From 4f25160078b665751a4655b13550e8431ee9381d Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Thu, 20 Jun 2024 03:19:15 +0200 Subject: [PATCH 1/2] vasnprintf, u*-vasnprintf: Support huge wide string arguments. * lib/vasnprintf.c: (VASNPRINTF): In 64-bit builds, handle the %ls directive ourselves. (local_wcslen, local_wcrtomb): Adjust #if condition. --- ChangeLog | 7 +++++++ lib/vasnprintf.c | 8 ++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5b465a7eb7..4b9485f6c7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2024-06-19 Bruno Haible <br...@clisp.org> + + vasnprintf, u*-vasnprintf: Support huge wide string arguments. + * lib/vasnprintf.c: (VASNPRINTF): In 64-bit builds, handle the %ls + directive ourselves. + (local_wcslen, local_wcrtomb): Adjust #if condition. + 2024-06-19 Bruno Haible <br...@clisp.org> vasnwprintf tests: Add test of %s directive with large arguments. diff --git a/lib/vasnprintf.c b/lib/vasnprintf.c index 1178822df8..1036f98498 100644 --- a/lib/vasnprintf.c +++ b/lib/vasnprintf.c @@ -248,7 +248,7 @@ local_strnlen (const char *string, size_t maxlen) # endif #endif -#if ((!USE_SNPRINTF || WIDE_CHAR_VERSION || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || NEED_WPRINTF_DIRECTIVE_LC) && WIDE_CHAR_VERSION) || ((!USE_SNPRINTF || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || NEED_PRINTF_DIRECTIVE_LS) && !WIDE_CHAR_VERSION && DCHAR_IS_TCHAR) +#if ((!USE_SNPRINTF || WIDE_CHAR_VERSION || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || NEED_WPRINTF_DIRECTIVE_LC) && WIDE_CHAR_VERSION) || ((!USE_SNPRINTF || (PTRDIFF_MAX > INT_MAX) || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || NEED_PRINTF_DIRECTIVE_LS) && !WIDE_CHAR_VERSION && DCHAR_IS_TCHAR) # if HAVE_WCSLEN # define local_wcslen wcslen # else @@ -290,7 +290,7 @@ local_wcsnlen (const wchar_t *s, size_t maxlen) # endif #endif -#if ((!USE_SNPRINTF || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || NEED_PRINTF_DIRECTIVE_LS || ENABLE_WCHAR_FALLBACK) || ((NEED_PRINTF_DIRECTIVE_LC || ENABLE_WCHAR_FALLBACK) && HAVE_WINT_T)) && !WIDE_CHAR_VERSION +#if ((!USE_SNPRINTF || (PTRDIFF_MAX > INT_MAX) || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || NEED_PRINTF_DIRECTIVE_LS || ENABLE_WCHAR_FALLBACK) || ((NEED_PRINTF_DIRECTIVE_LC || ENABLE_WCHAR_FALLBACK) && HAVE_WINT_T)) && !WIDE_CHAR_VERSION # if ENABLE_WCHAR_FALLBACK static size_t wctomb_fallback (char *s, wchar_t wc) @@ -3138,7 +3138,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp, } } #endif -#if !USE_SNPRINTF || WIDE_CHAR_VERSION || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || NEED_PRINTF_DIRECTIVE_LS || ENABLE_WCHAR_FALLBACK +#if WIDE_CHAR_VERSION || !USE_SNPRINTF || (PTRDIFF_MAX > INT_MAX) || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || NEED_PRINTF_DIRECTIVE_LS || ENABLE_WCHAR_FALLBACK else if (dp->conversion == 's' # if WIDE_CHAR_VERSION && a.arg[dp->arg_index].type != TYPE_WIDE_STRING @@ -7268,7 +7268,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp, errno = ENOMEM; goto fail_with_errno; -#if ENABLE_UNISTDIO || (!USE_SNPRINTF || WIDE_CHAR_VERSION || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || NEED_PRINTF_DIRECTIVE_LS || ENABLE_WCHAR_FALLBACK) || ((NEED_PRINTF_DIRECTIVE_LC || ENABLE_WCHAR_FALLBACK) && HAVE_WINT_T && !WIDE_CHAR_VERSION) || (NEED_WPRINTF_DIRECTIVE_C && WIDE_CHAR_VERSION) +#if ENABLE_UNISTDIO || (WIDE_CHAR_VERSION || !USE_SNPRINTF || (PTRDIFF_MAX > INT_MAX) || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || NEED_PRINTF_DIRECTIVE_LS || ENABLE_WCHAR_FALLBACK) || ((NEED_PRINTF_DIRECTIVE_LC || ENABLE_WCHAR_FALLBACK) && HAVE_WINT_T && !WIDE_CHAR_VERSION) || (NEED_WPRINTF_DIRECTIVE_C && WIDE_CHAR_VERSION) fail_with_EILSEQ: errno = EILSEQ; goto fail_with_errno; -- 2.34.1
>From 572d2511a00bb6a608517c4975ee7ad693218c61 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Thu, 20 Jun 2024 03:24:18 +0200 Subject: [PATCH 2/2] vasnprintf, u*-asnprintf tests: Add test of huge %ls arguments. * tests/test-vasnprintf-big.c: Include <wchar.h>. (main): Add tests for wide string arguments with > 2^31 wide characters. * tests/unistdio/test-u8-asnprintf-big.c: Include <wchar.h>. (main): Add tests for wide string arguments with > 2^31 wide characters. * tests/unistdio/test-ulc-asnprintf-big.c: Include <wchar.h>. (main): Add tests for wide string arguments with > 2^31 wide characters. --- ChangeLog | 8 +++ tests/test-vasnprintf-big.c | 92 +++++++++++++++++++++--- tests/unistdio/test-u8-asnprintf-big.c | 92 +++++++++++++++++++++--- tests/unistdio/test-ulc-asnprintf-big.c | 93 ++++++++++++++++++++++--- 4 files changed, 261 insertions(+), 24 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4b9485f6c7..1c156b791e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,13 @@ 2024-06-19 Bruno Haible <br...@clisp.org> + vasnprintf, u*-asnprintf tests: Add test of huge %ls arguments. + * tests/test-vasnprintf-big.c: Include <wchar.h>. + (main): Add tests for wide string arguments with > 2^31 wide characters. + * tests/unistdio/test-u8-asnprintf-big.c: Include <wchar.h>. + (main): Add tests for wide string arguments with > 2^31 wide characters. + * tests/unistdio/test-ulc-asnprintf-big.c: Include <wchar.h>. + (main): Add tests for wide string arguments with > 2^31 wide characters. + vasnprintf, u*-vasnprintf: Support huge wide string arguments. * lib/vasnprintf.c: (VASNPRINTF): In 64-bit builds, handle the %ls directive ourselves. diff --git a/tests/test-vasnprintf-big.c b/tests/test-vasnprintf-big.c index 65b4dc3f7c..eae1d2704d 100644 --- a/tests/test-vasnprintf-big.c +++ b/tests/test-vasnprintf-big.c @@ -31,6 +31,7 @@ #include <errno.h> #include <stdio.h> #include <string.h> +#include <wchar.h> #if HAVE_SETRLIMIT # include <sys/types.h> @@ -54,14 +55,16 @@ main () rl.rlim_cur = rl.rlim_max = 0; setrlimit (RLIMIT_CORE, &rl); # endif - /* The test below needs about 10 GiB of memory: + /* The test below needs about 25 GiB of memory: $ time /usr/bin/time -f "Max RSS: %M KiB" ./test-vasnprintf-big - Max RSS: 10487464 KiB - real 0m34,417s - user 0m26,175s - sys 0m8,240s - 5 GiB for the inputs and up to 5 GiB for temporary output buffers. */ - double needed = 10.0 * 1024 * 1024 * 1024; + Max RSS: 26216128 KiB + real 3m54,169s + user 3m33,309s + sys 0m20,819s + 5 GiB for the inputs in the %s tests, + or 20 GiB for the inputs in the %ls tests, + and up to 5 GiB for temporary output buffers. */ + double needed = 25.0 * 1024 * 1024 * 1024; double avail = physmem_claimable (1.0); printf ("memory needed = %g MiB, available = %g MiB\n", needed / 1024 / 1024, avail / 1024 / 1024); @@ -69,7 +72,7 @@ main () { /* Note: The malloc() calls can fail, due to ulimit of RLIMIT_DATA. For example, on OpenBSD 7.5, the soft limit is 1.0 GiB or 1.5 GiB, - and you need "ulimit -d 15728640". */ + and you need "ulimit -d 27262976". */ /* Verify that asnprintf() can return a string of size > 4 GiB. */ { @@ -189,6 +192,79 @@ main () } } } + + /* Verify that asnprintf() can take a wide string with an element count + > 2^31, < 2^32 as argument. */ + { + size_t n1 = 3 * (size_t) (INT_MAX / 2) + 10; + wchar_t *s1; + + s1 = (wchar_t *) malloc ((n1 + 1) * sizeof (wchar_t)); + if (s1 != NULL) + { + wmemset (s1, L'a', n1); + s1[n1] = L'\0'; + + size_t len; + char *s = asnprintf (NULL, &len, "x%lsy", s1); + if (s == NULL) + { + ASSERT (errno == ENOMEM); + skipped = true; + } + else + { + ASSERT (strlen (s) == len); + ASSERT (len == n1 + 2); + size_t i; + for (i = 0; i <= len; i++) + s[i] = (i == 0 ? 'x' : + i <= n1 ? 'a' : + i == n1 + 1 ? 'y' : + '\0'); + free (s); + } + free (s1); + } + } + + /* Verify that asnprintf() can take a wide string with an element count + > 2^32 as argument. */ + { + size_t n1 = 5 * (size_t) (INT_MAX / 2) + 10; + if (n1 > (size_t) INT_MAX) + { + wchar_t *s1; + + s1 = (wchar_t *) malloc ((n1 + 1) * sizeof (wchar_t)); + if (s1 != NULL) + { + wmemset (s1, L'a', n1); + s1[n1] = L'\0'; + + size_t len; + char *s = asnprintf (NULL, &len, "x%lsy", s1); + if (s == NULL) + { + ASSERT (errno == ENOMEM); + skipped = true; + } + else + { + ASSERT (strlen (s) == len); + ASSERT (len == n1 + 2); + size_t i; + for (i = 0; i <= len; i++) + s[i] = (i == 0 ? 'x' : + i <= n1 ? 'a' : + i == n1 + 1 ? 'y' : + '\0'); + free (s); + } + free (s1); + } + } + } } else skipped = true; diff --git a/tests/unistdio/test-u8-asnprintf-big.c b/tests/unistdio/test-u8-asnprintf-big.c index 6b7cced5ce..1a5b46d098 100644 --- a/tests/unistdio/test-u8-asnprintf-big.c +++ b/tests/unistdio/test-u8-asnprintf-big.c @@ -32,6 +32,7 @@ #include <stdio.h> #include <string.h> #include <unistr.h> +#include <wchar.h> #if HAVE_SETRLIMIT # include <sys/types.h> @@ -55,14 +56,16 @@ main () rl.rlim_cur = rl.rlim_max = 0; setrlimit (RLIMIT_CORE, &rl); # endif - /* The test below needs about 15 GiB of memory: + /* The test below needs about 30 GiB of memory: $ time /usr/bin/time -f "Max RSS: %M KiB" ./test-u8-asnprintf-big - Max RSS: 15730356 KiB - real 0m58,011s - user 0m46,403s - sys 0m11,604s - 5 GiB for the inputs and up to 10 GiB for temporary output buffers. */ - double needed = 15.0 * 1024 * 1024 * 1024; + Max RSS: 31459148 KiB + real 6m57,851s + user 6m28,413s + sys 0m29,382s + 5 GiB for the inputs in the %s tests, + or 20 GiB for the inputs in the %ls tests, + and up to 10 GiB for temporary output buffers. */ + double needed = 30.0 * 1024 * 1024 * 1024; double avail = physmem_claimable (1.0); printf ("memory needed = %g MiB, available = %g MiB\n", needed / 1024 / 1024, avail / 1024 / 1024); @@ -70,7 +73,7 @@ main () { /* Note: The malloc() calls can fail, due to ulimit of RLIMIT_DATA. For example, on OpenBSD 7.5, the soft limit is 1.0 GiB or 1.5 GiB, - and you need "ulimit -d 15728640". */ + and you need "ulimit -d 32505856". */ /* Verify that u8_asnprintf() can return a string of size > 4 GiB. */ { @@ -190,6 +193,79 @@ main () } } } + + /* Verify that u8_asnprintf() can take a wide string with an element count + > 2^31, < 2^32 as argument. */ + { + size_t n1 = 3 * (size_t) (INT_MAX / 2) + 10; + wchar_t *s1; + + s1 = (wchar_t *) malloc ((n1 + 1) * sizeof (wchar_t)); + if (s1 != NULL) + { + wmemset (s1, L'a', n1); + s1[n1] = L'\0'; + + size_t len; + uint8_t *s = u8_asnprintf (NULL, &len, "x%lsy", s1); + if (s == NULL) + { + ASSERT (errno == ENOMEM); + skipped = true; + } + else + { + ASSERT (u8_strlen (s) == len); + ASSERT (len == n1 + 2); + size_t i; + for (i = 0; i <= len; i++) + s[i] = (i == 0 ? 'x' : + i <= n1 ? 'a' : + i == n1 + 1 ? 'y' : + '\0'); + free (s); + } + free (s1); + } + } + + /* Verify that u8_asnprintf() can take a wide string with an element count + > 2^32 as argument. */ + { + size_t n1 = 5 * (size_t) (INT_MAX / 2) + 10; + if (n1 > (size_t) INT_MAX) + { + wchar_t *s1; + + s1 = (wchar_t *) malloc ((n1 + 1) * sizeof (wchar_t)); + if (s1 != NULL) + { + wmemset (s1, L'a', n1); + s1[n1] = L'\0'; + + size_t len; + uint8_t *s = u8_asnprintf (NULL, &len, "x%lsy", s1); + if (s == NULL) + { + ASSERT (errno == ENOMEM); + skipped = true; + } + else + { + ASSERT (u8_strlen (s) == len); + ASSERT (len == n1 + 2); + size_t i; + for (i = 0; i <= len; i++) + s[i] = (i == 0 ? 'x' : + i <= n1 ? 'a' : + i == n1 + 1 ? 'y' : + '\0'); + free (s); + } + free (s1); + } + } + } } else skipped = true; diff --git a/tests/unistdio/test-ulc-asnprintf-big.c b/tests/unistdio/test-ulc-asnprintf-big.c index 8ef733d962..fcb8799684 100644 --- a/tests/unistdio/test-ulc-asnprintf-big.c +++ b/tests/unistdio/test-ulc-asnprintf-big.c @@ -31,6 +31,7 @@ #include <errno.h> #include <stdio.h> #include <string.h> +#include <wchar.h> #if HAVE_SETRLIMIT # include <sys/types.h> @@ -54,14 +55,17 @@ main () rl.rlim_cur = rl.rlim_max = 0; setrlimit (RLIMIT_CORE, &rl); # endif - /* The test below needs about 15 GiB of memory: + /* The test below needs about 25 GiB of memory: $ time /usr/bin/time -f "Max RSS: %M KiB" ./test-ulc-asnprintf-big - Max RSS: 15730376 KiB - real 1m13,702s - user 1m0,184s - sys 0m13,512s - 5 GiB for the inputs and up to 10 GiB for temporary output buffers. */ - double needed = 15.0 * 1024 * 1024 * 1024; + Max RSS: 26216192 KiB + real 4m34,682s + user 4m8,592s + sys 0m26,063s + - In the %s tests: + 5 GiB for the inputs and up to 10 GiB for temporary output buffers. + - In the %ls tests: + 20 GiB for the inputs and up to 5 GiB for temporary output buffers. */ + double needed = 25.0 * 1024 * 1024 * 1024; double avail = physmem_claimable (1.0); printf ("memory needed = %g MiB, available = %g MiB\n", needed / 1024 / 1024, avail / 1024 / 1024); @@ -69,7 +73,7 @@ main () { /* Note: The malloc() calls can fail, due to ulimit of RLIMIT_DATA. For example, on OpenBSD 7.5, the soft limit is 1.0 GiB or 1.5 GiB, - and you need "ulimit -d 15728640". */ + and you need "ulimit -d 27262976". */ /* Verify that ulc_asnprintf() can return a string of size > 4 GiB. */ { @@ -243,6 +247,79 @@ main () } } } + + /* Verify that ulc_asnprintf() can take a wide string with an element count + > 2^31, < 2^32 as argument. */ + { + size_t n1 = 3 * (size_t) (INT_MAX / 2) + 10; + wchar_t *s1; + + s1 = (wchar_t *) malloc ((n1 + 1) * sizeof (wchar_t)); + if (s1 != NULL) + { + wmemset (s1, L'a', n1); + s1[n1] = L'\0'; + + size_t len; + char *s = ulc_asnprintf (NULL, &len, "x%lsy", s1); + if (s == NULL) + { + ASSERT (errno == ENOMEM); + skipped = true; + } + else + { + ASSERT (strlen (s) == len); + ASSERT (len == n1 + 2); + size_t i; + for (i = 0; i <= len; i++) + s[i] = (i == 0 ? 'x' : + i <= n1 ? 'a' : + i == n1 + 1 ? 'y' : + '\0'); + free (s); + } + free (s1); + } + } + + /* Verify that ulc_asnprintf() can take a wide string with an element count + > 2^32 as argument. */ + { + size_t n1 = 5 * (size_t) (INT_MAX / 2) + 10; + if (n1 > (size_t) INT_MAX) + { + wchar_t *s1; + + s1 = (wchar_t *) malloc ((n1 + 1) * sizeof (wchar_t)); + if (s1 != NULL) + { + wmemset (s1, L'a', n1); + s1[n1] = L'\0'; + + size_t len; + char *s = ulc_asnprintf (NULL, &len, "x%lsy", s1); + if (s == NULL) + { + ASSERT (errno == ENOMEM); + skipped = true; + } + else + { + ASSERT (strlen (s) == len); + ASSERT (len == n1 + 2); + size_t i; + for (i = 0; i <= len; i++) + s[i] = (i == 0 ? 'x' : + i <= n1 ? 'a' : + i == n1 + 1 ? 'y' : + '\0'); + free (s); + } + free (s1); + } + } + } } else skipped = true; -- 2.34.1