This patch corrects handling of UTF-32LE and UTF32-BE in __unicode::__literal_encoding_is_unicode<_CharT>, so they are recognized as unicode and functions produces correct result for wchar_t.
Use `__unicode::__field_width` to compute the estimated witdh of the charcter for unicode wide encoding. PR libstdc++-v3/119593 libstdc++-v3/ChangeLog: * include/bits/unicode.h (__unicode::__literal_encoding_is_unicode<_CharT>): Corrected handing for UTF-16 and UTF-32 with "LE" or "BE" suffix. * include/std/format (__formatter_str::_S_character_width): Define. (__formatter_str::_S_character_width): Updated passed char length. * testsuite/std/format/functions/format.cc: Test for wchar_t. --- Testing on x86_64-linux. OK for trunk? I believe we should backport it, given that all wchar_t uses are impacted. libstdc++-v3/include/bits/unicode.h | 2 ++ libstdc++-v3/include/std/format | 15 ++++++++++++++- .../testsuite/std/format/functions/format.cc | 8 ++++++-- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/libstdc++-v3/include/bits/unicode.h b/libstdc++-v3/include/bits/unicode.h index 24b1ac3d53d..99d972eccff 100644 --- a/libstdc++-v3/include/bits/unicode.h +++ b/libstdc++-v3/include/bits/unicode.h @@ -1039,6 +1039,8 @@ inline namespace __v16_0_0 string_view __s(__enc); if (__s.ends_with("//")) __s.remove_suffix(2); + if (__s.ends_with("LE") || __s.ends_with("BE")) + __s.remove_suffix(2); return __s == "16" || __s == "32"; } } diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format index c3327e1d384..603facc51de 100644 --- a/libstdc++-v3/include/std/format +++ b/libstdc++-v3/include/std/format @@ -1277,12 +1277,25 @@ namespace __format _M_spec); } + static size_t + _S_character_width(_CharT __c) + { + using __unicode::__literal_encoding_is_unicode; + // N.B. single byte cannot encode charcter of width greater than 1 + if (sizeof(_CharT) > 1u && __literal_encoding_is_unicode<_CharT>()) + return __unicode::__field_width(__c); + else + return 1u; + } + template<typename _Out> typename basic_format_context<_Out, _CharT>::iterator _M_format_character(_CharT __c, basic_format_context<_Out, _CharT>& __fc) const { - return __format::__write_padded_as_spec({&__c, 1u}, 1, __fc, _M_spec); + return __format::__write_padded_as_spec({&__c, 1u}, + _S_character_width(__c), + __fc, _M_spec); } template<typename _Int> diff --git a/libstdc++-v3/testsuite/std/format/functions/format.cc b/libstdc++-v3/testsuite/std/format/functions/format.cc index 7fc42017045..d8dbf463413 100644 --- a/libstdc++-v3/testsuite/std/format/functions/format.cc +++ b/libstdc++-v3/testsuite/std/format/functions/format.cc @@ -501,9 +501,14 @@ test_unicode() { // Similar to sC example in test_std_examples, but not from the standard. // Verify that the character "🤡" has estimated field width 2, - // rather than estimated field width equal to strlen("🤡"), which would be 4. + // rather than estimated field width equal to strlen("🤡"), which would be 4, + // or just width 1 for single character. std::string sC = std::format("{:*<3}", "🤡"); VERIFY( sC == "🤡*" ); + std::wstring wsC = std::format(L"{:*<3}", L"🤡"); + VERIFY( wsC == L"🤡*" ); + wsC = std::format(L"{:*<3}", L'🤡'); + VERIFY( wsC == L"🤡*" ); // Verify that "£" has estimated field width 1, not strlen("£") == 2. std::string sL = std::format("{:*<3}", "£"); @@ -517,7 +522,6 @@ test_unicode() std::string sP = std::format("{:1.1} {:*<1.1}", "£", "🤡"); VERIFY( sP == "£ *" ); sP = std::format("{:*<2.1} {:*<2.1}", "£", "🤡"); - VERIFY( sP == "£* **" ); // Verify field width handling for extended grapheme clusters, // and that a cluster gets output as a single item, not truncated. -- 2.48.1