Yesterday I did: > (VASNPRINTF): In this case, implement %ls and %lc directly. Adjust a > couple of #if conditions. For the conversion from TCHAR_T[] to > DCHAR_T[], use mbsrtowcs.
Oops, this was buggy: - The %lc implementation needs to ignore the precision. - The conversion from TCHAR_T[] to DCHAR_T[] must not stop at NUL bytes. It needs to convert the entire output of snprintf. The new unit tests for the %c and %lc directives trigger these bugs. Fixed as follows. 2023-03-21 Bruno Haible <br...@clisp.org> vasnwprintf: Fix for older platforms without swprintf. * lib/vasnprintf.c (VASNPRINTF): In the %lc handling, ignore the precision. Convert the snprintf result to a wchar_t[] not by mbsrtowcs, but by a loop that does not stop at NUL characters. * tests/test-vasnwprintf-posix.c (test_function): Add more tests for the %c and %lc directives. * modules/vasnwprintf (Depends-on): Add mbrtowc. Remove mbsrtowcs. diff --git a/lib/vasnprintf.c b/lib/vasnprintf.c index 0b349f4b55..bd13002e98 100644 --- a/lib/vasnprintf.c +++ b/lib/vasnprintf.c @@ -2432,8 +2432,6 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp, Instead, just copy the argument wchar_t[] to the result. */ int flags = dp->flags; size_t width; - int has_precision; - size_t precision; width = 0; if (dp->width_start != dp->width_end) @@ -2464,59 +2462,62 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp, } } - has_precision = 0; - precision = 6; - if (dp->precision_start != dp->precision_end) - { - if (dp->precision_arg_index != ARG_NONE) - { - int arg; - - if (!(a.arg[dp->precision_arg_index].type == TYPE_INT)) - abort (); - arg = a.arg[dp->precision_arg_index].a.a_int; - /* "A negative precision is taken as if the precision - were omitted." */ - if (arg >= 0) - { - precision = arg; - has_precision = 1; - } - } - else - { - const FCHAR_T *digitp = dp->precision_start + 1; - - precision = 0; - while (digitp != dp->precision_end) - precision = xsum (xtimes (precision, 10), *digitp++ - '0'); - has_precision = 1; - } - } - { - const wchar_t *arg; + const wchar_t *ls_arg; wchar_t lc_arg[1]; size_t characters; if (dp->conversion == 's') { - arg = a.arg[dp->arg_index].a.a_wide_string; + int has_precision; + size_t precision; + + has_precision = 0; + precision = 6; + if (dp->precision_start != dp->precision_end) + { + if (dp->precision_arg_index != ARG_NONE) + { + int arg; + + if (!(a.arg[dp->precision_arg_index].type == TYPE_INT)) + abort (); + arg = a.arg[dp->precision_arg_index].a.a_int; + /* "A negative precision is taken as if the precision + were omitted." */ + if (arg >= 0) + { + precision = arg; + has_precision = 1; + } + } + else + { + const FCHAR_T *digitp = dp->precision_start + 1; + + precision = 0; + while (digitp != dp->precision_end) + precision = xsum (xtimes (precision, 10), *digitp++ - '0'); + has_precision = 1; + } + } + + ls_arg = a.arg[dp->arg_index].a.a_wide_string; if (has_precision) { /* Use only at most PRECISION wide characters, from the left. */ - const wchar_t *arg_end; + const wchar_t *ls_arg_end; - arg_end = arg; + ls_arg_end = ls_arg; characters = 0; for (; precision > 0; precision--) { - if (*arg_end == 0) + if (*ls_arg_end == 0) /* Found the terminating null wide character. */ break; - arg_end++; + ls_arg_end++; characters++; } } @@ -2524,17 +2525,14 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp, { /* Use the entire string, and count the number of wide characters. */ - characters = local_wcslen (arg); + characters = local_wcslen (ls_arg); } } else /* dp->conversion == 'c' */ { lc_arg[0] = (wchar_t) a.arg[dp->arg_index].a.a_wide_char; - arg = lc_arg; - if (has_precision && precision == 0) - characters = 0; - else - characters = 1; + ls_arg = lc_arg; + characters = 1; } { @@ -2550,7 +2548,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp, if (characters > 0) { - DCHAR_CPY (result + length, arg, characters); + DCHAR_CPY (result + length, ls_arg, characters); length += characters; } @@ -5854,21 +5852,59 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp, tmpsrc = tmp; # endif # if WIDE_CHAR_VERSION - const TCHAR_T *tmpsrc2; + /* Convert tmpsrc[0..count-1] to a freshly allocated + wide character array. */ mbstate_t state; - tmpsrc2 = tmpsrc; memset (&state, '\0', sizeof (mbstate_t)); - tmpdst_len = mbsrtowcs (NULL, &tmpsrc2, 0, &state); - if (tmpdst_len == (size_t) -1) - goto fail_with_errno; + tmpdst_len = 0; + { + const TCHAR_T *src = tmpsrc; + size_t srclen = count; + + for (; srclen > 0; tmpdst_len++) + { + /* Parse the next multibyte character. */ + size_t ret = mbrtowc (NULL, src, srclen, &state); + if (ret == (size_t)(-2) || ret == (size_t)(-1)) + goto fail_with_EILSEQ; + if (ret == 0) + ret = 1; + src += ret; + srclen -= ret; + } + } + tmpdst = (wchar_t *) malloc ((tmpdst_len + 1) * sizeof (wchar_t)); if (tmpdst == NULL) goto out_of_memory; - tmpsrc2 = tmpsrc; + memset (&state, '\0', sizeof (mbstate_t)); - (void) mbsrtowcs (tmpdst, &tmpsrc2, tmpdst_len + 1, &state); + { + DCHAR_T *destptr = tmpdst; + size_t len = tmpdst_len; + const TCHAR_T *src = tmpsrc; + size_t srclen = count; + + for (; srclen > 0; destptr++, len--) + { + /* Parse the next multibyte character. */ + size_t ret = mbrtowc (destptr, src, srclen, &state); + if (ret == (size_t)(-2) || ret == (size_t)(-1)) + /* Should already have been caught in the first + loop, above. */ + abort (); + if (ret == 0) + ret = 1; + src += ret; + srclen -= ret; + } + /* By the way tmpdst_len was computed, len should now + be 0. */ + if (len != 0) + abort (); + } # else tmpdst = DCHAR_CONV_FROM_ENCODING (locale_charset (), diff --git a/modules/vasnwprintf b/modules/vasnwprintf index 109c39f1c5..91c4ca64ed 100644 --- a/modules/vasnwprintf +++ b/modules/vasnwprintf @@ -35,7 +35,7 @@ errno memchr assert-h wchar -mbsrtowcs +mbrtowc wmemcpy wmemset diff --git a/tests/test-vasnwprintf-posix.c b/tests/test-vasnwprintf-posix.c index c609a104ff..e53c6a33f3 100644 --- a/tests/test-vasnwprintf-posix.c +++ b/tests/test-vasnwprintf-posix.c @@ -3940,6 +3940,26 @@ test_function (wchar_t * (*my_asnwprintf) (wchar_t *, size_t *, const wchar_t *, free (result); } + { /* Precision is ignored. */ + size_t length; + wchar_t *result = + my_asnwprintf (NULL, &length, + L"%.0c %d", (unsigned char) 'x', 33, 44, 55); + ASSERT (wcscmp (result, L"x 33") == 0); + ASSERT (length == wcslen (result)); + free (result); + } + + { /* NUL character. */ + size_t length; + wchar_t *result = + my_asnwprintf (NULL, &length, + L"a%cz %d", '\0', 33, 44, 55); + ASSERT (wmemcmp (result, L"a\0z 33\0", 6 + 1) == 0); + ASSERT (length == 6); + free (result); + } + #if HAVE_WCHAR_T static wint_t L_x = (wchar_t) 'x'; @@ -3982,6 +4002,24 @@ test_function (wchar_t * (*my_asnwprintf) (wchar_t *, size_t *, const wchar_t *, ASSERT (length == wcslen (result)); free (result); } + + { /* Precision is ignored. */ + size_t length; + wchar_t *result = + my_asnwprintf (NULL, &length, L"%.0lc %d", L_x, 33, 44, 55); + ASSERT (wcscmp (result, L"x 33") == 0); + ASSERT (length == wcslen (result)); + free (result); + } + + { /* NUL character. */ + size_t length; + wchar_t *result = + my_asnwprintf (NULL, &length, L"a%lcz %d", (wint_t) L'\0', 33, 44, 55); + ASSERT (wmemcmp (result, L"a\0z 33\0", 6 + 1) == 0); + ASSERT (length == 6); + free (result); + } #endif /* Test the support of the 'b' conversion specifier for binary output of