On Thu, 3 Apr 2025 at 13:55, Tomasz Kaminski <tkami...@redhat.com> wrote: > > > > On Thu, Apr 3, 2025 at 2:49 PM Jonathan Wakely <jwak...@redhat.com> wrote: >> >> On Thu, 3 Apr 2025 at 10:24, Jonathan Wakely <jwak...@redhat.com> wrote: >> > >> > On Thu, 3 Apr 2025 at 09:55, Tomasz Kamiński <tkami...@redhat.com> wrote: >> > > >> > > This patch corrects handling of UTF-32LE and UTF32-BE in >> > > __unicode::__literal_encoding_is_unicode<_CharT>, so they are >> > > recognized as unicode and functions produces correct result for wchar_t. >> > > >> > > Use `__unicode::__field_width` to compute the estimated witdh >> > >> > "width" >> > >> > > of the charcter for unicode wide encoding. >> > >> > "character" >> > >> > > >> > > PR libstdc++-v3/119593 >> > > >> > > libstdc++-v3/ChangeLog: >> > > >> > > * include/bits/unicode.h >> > > (__unicode::__literal_encoding_is_unicode<_CharT>): >> > > Corrected handing for UTF-16 and UTF-32 with "LE" or "BE" suffix. >> > > * include/std/format (__formatter_str::_S_character_width): >> > > Define. >> > > (__formatter_str::_S_character_width): Updated passed char >> > > length. >> > > * testsuite/std/format/functions/format.cc: Test for wchar_t. >> > > --- >> > > Testing on x86_64-linux. OK for trunk? >> > > I believe we should backport it, given that all wchar_t uses are >> > > impacted. >> > > >> > > libstdc++-v3/include/bits/unicode.h | 2 ++ >> > > libstdc++-v3/include/std/format | 15 ++++++++++++++- >> > > .../testsuite/std/format/functions/format.cc | 8 ++++++-- >> > > 3 files changed, 22 insertions(+), 3 deletions(-) >> > > >> > > diff --git a/libstdc++-v3/include/bits/unicode.h >> > > b/libstdc++-v3/include/bits/unicode.h >> > > index 24b1ac3d53d..99d972eccff 100644 >> > > --- a/libstdc++-v3/include/bits/unicode.h >> > > +++ b/libstdc++-v3/include/bits/unicode.h >> > > @@ -1039,6 +1039,8 @@ inline namespace __v16_0_0 >> > > string_view __s(__enc); >> > > if (__s.ends_with("//")) >> > > __s.remove_suffix(2); >> > > + if (__s.ends_with("LE") || __s.ends_with("BE")) >> > > + __s.remove_suffix(2); >> > > return __s == "16" || __s == "32"; >> > > } >> > > } >> > > diff --git a/libstdc++-v3/include/std/format >> > > b/libstdc++-v3/include/std/format >> > > index c3327e1d384..603facc51de 100644 >> > > --- a/libstdc++-v3/include/std/format >> > > +++ b/libstdc++-v3/include/std/format >> > > @@ -1277,12 +1277,25 @@ namespace __format >> > > _M_spec); >> > > } >> > > >> > >> > Please put [[__gnu__::__always_inline__]] on this function, so that it >> > doesn't add any overhead for narrow chars: >> > >> > > + static size_t >> > > + _S_character_width(_CharT __c) >> > > + { >> > > + using __unicode::__literal_encoding_is_unicode; >> > > + // N.B. single byte cannot encode charcter of width greater than >> > > 1 >> > > + if (sizeof(_CharT) > 1u && >> > > __literal_encoding_is_unicode<_CharT>()) >> > >> > I think this can be 'if constexpr' >> > >> > OK for trunk and gcc-14 with those changes, thanks. >> > (No backport for gcc-13 because it doesn't have the Unicode-aware >> > field width support.) >> > >> > > + return __unicode::__field_width(__c); >> > > + else >> > > + return 1u; >> > > + } >> > > + >> > > template<typename _Out> >> > > typename basic_format_context<_Out, _CharT>::iterator >> > > _M_format_character(_CharT __c, >> > > basic_format_context<_Out, _CharT>& __fc) const >> > > { >> > > - return __format::__write_padded_as_spec({&__c, 1u}, 1, __fc, >> > > _M_spec); >> > > + return __format::__write_padded_as_spec({&__c, 1u}, >> > > + >> > > _S_character_width(__c), >> > > + __fc, _M_spec); >> > > } >> > > >> > > template<typename _Int> >> > > diff --git a/libstdc++-v3/testsuite/std/format/functions/format.cc >> > > b/libstdc++-v3/testsuite/std/format/functions/format.cc >> > > index 7fc42017045..d8dbf463413 100644 >> > > --- a/libstdc++-v3/testsuite/std/format/functions/format.cc >> > > +++ b/libstdc++-v3/testsuite/std/format/functions/format.cc >> > > @@ -501,9 +501,14 @@ test_unicode() >> > > { >> > > // Similar to sC example in test_std_examples, but not from the >> > > standard. >> > > // Verify that the character "🤡" has estimated field width 2, >> > > - // rather than estimated field width equal to strlen("🤡"), which >> > > would be 4. >> > > + // rather than estimated field width equal to strlen("🤡"), which >> > > would be 4, >> > > + // or just width 1 for single character. >> > > std::string sC = std::format("{:*<3}", "🤡"); >> > > VERIFY( sC == "🤡*" ); >> > > + std::wstring wsC = std::format(L"{:*<3}", L"🤡"); >> > > + VERIFY( wsC == L"🤡*" ); >> > > + wsC = std::format(L"{:*<3}", L'🤡'); >> > > + VERIFY( wsC == L"🤡*" ); >> > > >> > > // Verify that "£" has estimated field width 1, not strlen("£") == 2. >> > > std::string sL = std::format("{:*<3}", "£"); >> > > @@ -517,7 +522,6 @@ test_unicode() >> > > std::string sP = std::format("{:1.1} {:*<1.1}", "£", "🤡"); >> > > VERIFY( sP == "£ *" ); >> > > sP = std::format("{:*<2.1} {:*<2.1}", "£", "🤡"); >> > > - VERIFY( sP == "£* **" ); >> >> I didn't notice at first that this line has been removed, was that an >> accident? > > Yes, it was an accident. Unfortunately I just lande
Yes, please - thanks!