These two patches add support for wide strings with more than INT_MAX wide characters to vasnwprintf.
Without the change in lib/vasnprintf.c, the underlying swprintf function is used, which always returns -1 for such arguments, and the logic in vasnprintf.c then increases the memory allocation exponentially (17 GB -> 34 GB -> 68 GB -> 137 GB, which finally fails). This patch set completes step 1 of <https://lists.gnu.org/archive/html/bug-gnulib/2024-04/msg00352.html>. 2024-06-20 Bruno Haible <br...@clisp.org> vasnwprintf tests: Add test of huge %ls arguments. * tests/test-vasnwprintf-big.c (main): Add tests for wide string arguments with > 2^31 wide characters. vasnwprintf: Support huge wide string arguments. * lib/vasnprintf.c: (VASNPRINTF): In 64-bit builds, handle the %ls directive ourselves. (local_wcslen): Adjust #if condition.
>From 94d34cfb941880ba7112cefb196a4a0fc9f713de Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Thu, 20 Jun 2024 08:11:53 +0200 Subject: [PATCH 1/2] vasnwprintf: Support huge wide string arguments. * lib/vasnprintf.c: (VASNPRINTF): In 64-bit builds, handle the %ls directive ourselves. (local_wcslen): Adjust #if condition. --- ChangeLog | 7 +++++++ lib/vasnprintf.c | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index c68af8c9e7..ea01cfe60d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2024-06-20 Bruno Haible <br...@clisp.org> + + vasnwprintf: Support huge wide string arguments. + * lib/vasnprintf.c: (VASNPRINTF): In 64-bit builds, handle the %ls + directive ourselves. + (local_wcslen): Adjust #if condition. + 2024-06-20 Paul Eggert <egg...@cs.ucla.edu> sigsegv: avoid unlikely undefined behavior diff --git a/lib/vasnprintf.c b/lib/vasnprintf.c index 1036f98498..8efb9ebee2 100644 --- a/lib/vasnprintf.c +++ b/lib/vasnprintf.c @@ -248,7 +248,7 @@ local_strnlen (const char *string, size_t maxlen) # endif #endif -#if ((!USE_SNPRINTF || WIDE_CHAR_VERSION || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || NEED_WPRINTF_DIRECTIVE_LC) && WIDE_CHAR_VERSION) || ((!USE_SNPRINTF || (PTRDIFF_MAX > INT_MAX) || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || NEED_PRINTF_DIRECTIVE_LS) && !WIDE_CHAR_VERSION && DCHAR_IS_TCHAR) +#if ((!USE_SNPRINTF || WIDE_CHAR_VERSION || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || (PTRDIFF_MAX > INT_MAX) || !DCHAR_IS_TCHAR || NEED_WPRINTF_DIRECTIVE_LC) && WIDE_CHAR_VERSION) || ((!USE_SNPRINTF || (PTRDIFF_MAX > INT_MAX) || !HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || NEED_PRINTF_DIRECTIVE_LS) && !WIDE_CHAR_VERSION && DCHAR_IS_TCHAR) # if HAVE_WCSLEN # define local_wcslen wcslen # else @@ -2993,7 +2993,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp, } } #endif -#if WIDE_CHAR_VERSION && (!DCHAR_IS_TCHAR || NEED_WPRINTF_DIRECTIVE_LC) +#if WIDE_CHAR_VERSION && ((PTRDIFF_MAX > INT_MAX) || !DCHAR_IS_TCHAR || NEED_WPRINTF_DIRECTIVE_LC) else if ((dp->conversion == 's' && a.arg[dp->arg_index].type == TYPE_WIDE_STRING) || (dp->conversion == 'c' -- 2.34.1
>From 45a1b5a72c097fc68b38638a3c56ef42c0f53d73 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Thu, 20 Jun 2024 08:13:25 +0200 Subject: [PATCH 2/2] vasnwprintf tests: Add test of huge %ls arguments. * tests/test-vasnwprintf-big.c (main): Add tests for wide string arguments with > 2^31 wide characters. --- ChangeLog | 4 ++ tests/test-vasnwprintf-big.c | 91 ++++++++++++++++++++++++++++++++---- 2 files changed, 87 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index ea01cfe60d..5fea7e958b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2024-06-20 Bruno Haible <br...@clisp.org> + vasnwprintf tests: Add test of huge %ls arguments. + * tests/test-vasnwprintf-big.c (main): Add tests for wide string + arguments with > 2^31 wide characters. + vasnwprintf: Support huge wide string arguments. * lib/vasnprintf.c: (VASNPRINTF): In 64-bit builds, handle the %ls directive ourselves. diff --git a/tests/test-vasnwprintf-big.c b/tests/test-vasnwprintf-big.c index 30f123f15f..1c4411efca 100644 --- a/tests/test-vasnwprintf-big.c +++ b/tests/test-vasnwprintf-big.c @@ -55,14 +55,16 @@ main () rl.rlim_cur = rl.rlim_max = 0; setrlimit (RLIMIT_CORE, &rl); # endif - /* The test below needs about 25 GiB of memory: + /* The test below needs about 40 GiB of memory: $ time /usr/bin/time -f "Max RSS: %M KiB" ./test-vasnwprintf-big - Max RSS: 26216204 KiB - real 4m22,540s - user 4m6,322s - sys 0m16,203s - 5 GiB for the inputs and up to 20 GiB for temporary output buffers. */ - double needed = 25.0 * 1024 * 1024 * 1024; + Max RSS: 41944784 KiB + real 5m8,508s + user 4m38,035s + sys 0m30,456s + 5 GiB for the inputs in the %s tests, + or 20 GiB for the inputs in the %ls tests, + and up to 20 GiB for temporary output buffers. */ + double needed = 40.0 * 1024 * 1024 * 1024; double avail = physmem_claimable (1.0); printf ("memory needed = %g MiB, available = %g MiB\n", needed / 1024 / 1024, avail / 1024 / 1024); @@ -70,7 +72,7 @@ main () { /* Note: The malloc() calls can fail, due to ulimit of RLIMIT_DATA. For example, on OpenBSD 7.5, the soft limit is 1.0 GiB or 1.5 GiB, - and you need "ulimit -d 26214400". */ + and you need "ulimit -d 42991616". */ /* Verify that asnwprintf() can return a string of size > 4 GiB. */ { @@ -190,6 +192,79 @@ main () } } } + + /* Verify that asnwprintf() can take a wide string with an element count + > 2^31, < 2^32 as argument. */ + { + size_t n1 = 3 * (size_t) (INT_MAX / 2) + 10; + wchar_t *s1; + + s1 = (wchar_t *) malloc ((n1 + 1) * sizeof (wchar_t)); + if (s1 != NULL) + { + wmemset (s1, L'a', n1); + s1[n1] = L'\0'; + + size_t len; + wchar_t *s = asnwprintf (NULL, &len, L"x%lsy", s1); + if (s == NULL) + { + ASSERT (errno == ENOMEM); + skipped = true; + } + else + { + ASSERT (wcslen (s) == len); + ASSERT (len == n1 + 2); + size_t i; + for (i = 0; i <= len; i++) + s[i] = (i == 0 ? 'x' : + i <= n1 ? 'a' : + i == n1 + 1 ? 'y' : + '\0'); + free (s); + } + free (s1); + } + } + + /* Verify that asnwprintf() can take a wide string with an element count + > 2^32 as argument. */ + { + size_t n1 = 5 * (size_t) (INT_MAX / 2) + 10; + if (n1 > (size_t) INT_MAX) + { + wchar_t *s1; + + s1 = (wchar_t *) malloc ((n1 + 1) * sizeof (wchar_t)); + if (s1 != NULL) + { + wmemset (s1, L'a', n1); + s1[n1] = L'\0'; + + size_t len; + wchar_t *s = asnwprintf (NULL, &len, L"x%lsy", s1); + if (s == NULL) + { + ASSERT (errno == ENOMEM); + skipped = true; + } + else + { + ASSERT (wcslen (s) == len); + ASSERT (len == n1 + 2); + size_t i; + for (i = 0; i <= len; i++) + s[i] = (i == 0 ? 'x' : + i <= n1 ? 'a' : + i == n1 + 1 ? 'y' : + '\0'); + free (s); + } + free (s1); + } + } + } } else skipped = true; -- 2.34.1