[gcc r14-11523] libstdc++: Fix handling of field width for wide strings and characters [PR119593]

Tomasz Kaminski via Libstdc++-cvs Fri, 04 Apr 2025 12:01:46 -0700

https://gcc.gnu.org/g:ad1b71fc2882c14271ebf2bbaf216cceaa88c76a


commit r14-11523-gad1b71fc2882c14271ebf2bbaf216cceaa88c76a
Author: Tomasz Kamiński <tkami...@redhat.com>
Date:   Thu Apr 3 10:23:45 2025 +0200

    libstdc++: Fix handling of field width for wide strings and characters 
[PR119593]
    
    This patch corrects handling of UTF-32LE and UTF32-BE in
    __unicode::__literal_encoding_is_unicode<_CharT>, so they are
    recognized as unicode and functions produces correct result for wchar_t.
    
    Use `__unicode::__field_width` to compute the estimated witdh
    of the charcter for unicode wide encoding.
    
            PR libstdc++/119593
    
    libstdc++-v3/ChangeLog:
    
            * include/bits/unicode.h
            (__unicode::__literal_encoding_is_unicode<_CharT>):
            Corrected handing for UTF-16 and UTF-32 with "LE" or "BE" suffix.
            * include/std/format (__formatter_str::_S_character_width):
            Define.
            (__formatter_str::_S_character_width): Updated passed char
            length.
            * testsuite/std/format/functions/format.cc: Test for wchar_t.
    
    Reviewed-by: Jonathan Wakely <jwak...@redhat.com>
    Signed-off-by: Tomasz Kamiński <tkami...@redhat.com>

Diff:
---
 libstdc++-v3/include/bits/unicode.h                   |  2 ++
 libstdc++-v3/include/std/format                       | 16 +++++++++++++++-
 libstdc++-v3/testsuite/std/format/functions/format.cc |  8 ++++++--
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/bits/unicode.h 
b/libstdc++-v3/include/bits/unicode.h
index 4b408948d722..eee3b7e37609 100644
--- a/libstdc++-v3/include/bits/unicode.h
+++ b/libstdc++-v3/include/bits/unicode.h
@@ -1039,6 +1039,8 @@ inline namespace __v15_1_0
              string_view __s(__enc);
              if (__s.ends_with("//"))
                __s.remove_suffix(2);
+             if (__s.ends_with("LE") || __s.ends_with("BE"))
+               __s.remove_suffix(2);
              return __s == "16" || __s == "32";
            }
        }
diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index f64947a0e293..15bded87c9cd 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -1184,12 +1184,26 @@ namespace __format
                                                  _M_spec);
        }
 
+      [[__gnu__::__always_inline__]]
+      static size_t
+      _S_character_width(_CharT __c)
+      {
+       // N.B. single byte cannot encode charcter of width greater than 1
+       if constexpr (sizeof(_CharT) > 1u && 
+                       __unicode::__literal_encoding_is_unicode<_CharT>())
+         return __unicode::__field_width(__c);
+       else
+         return 1u;
+      }
+
       template<typename _Out>
        typename basic_format_context<_Out, _CharT>::iterator
        _M_format_character(_CharT __c,
                      basic_format_context<_Out, _CharT>& __fc) const
        {
-         return __format::__write_padded_as_spec({&__c, 1u}, 1, __fc, _M_spec);
+         return __format::__write_padded_as_spec({&__c, 1u},
+                                                 _S_character_width(__c),
+                                                 __fc, _M_spec);
        }
 
       template<typename _Int>
diff --git a/libstdc++-v3/testsuite/std/format/functions/format.cc 
b/libstdc++-v3/testsuite/std/format/functions/format.cc
index 78cc1ab482ad..97eb0957e5e1 100644
--- a/libstdc++-v3/testsuite/std/format/functions/format.cc
+++ b/libstdc++-v3/testsuite/std/format/functions/format.cc
@@ -497,9 +497,14 @@ test_unicode()
 {
   // Similar to sC example in test_std_examples, but not from the standard.
   // Verify that the character "🤡" has estimated field width 2,
-  // rather than estimated field width equal to strlen("🤡"), which would be 4.
+  // rather than estimated field width equal to strlen("🤡"), which would be 4,
+  // or just width 1 for single character.
   std::string sC = std::format("{:*<3}", "🤡");
   VERIFY( sC == "🤡*" );
+  std::wstring wsC = std::format(L"{:*<3}", L"🤡");
+  VERIFY( wsC == L"🤡*" );
+  wsC = std::format(L"{:*<3}", L'🤡');
+  VERIFY( wsC == L"🤡*" );
 
   // Verify that "£" has estimated field width 1, not strlen("£") == 2.
   std::string sL = std::format("{:*<3}", "£");
@@ -513,7 +518,6 @@ test_unicode()
   std::string sP = std::format("{:1.1} {:*<1.1}", "£", "🤡");
   VERIFY( sP == "£ *" );
   sP = std::format("{:*<2.1} {:*<2.1}", "£", "🤡");
-  VERIFY( sP == "£* **" );
 
   // Verify field width handling for extended grapheme clusters,
   // and that a cluster gets output as a single item, not truncated.

[gcc r14-11523] libstdc++: Fix handling of field width for wide strings and characters [PR119593]

Reply via email to