[PATCH] libstdc++: Fix handling of field width for wide strings and characters [PR119593]

Tomasz Kamiński Thu, 03 Apr 2025 08:28:27 -0700

This patch corrects handling of UTF-32LE and UTF32-BE in
__unicode::__literal_encoding_is_unicode<_CharT>, so they are
recognized as unicode and functions produces correct result for wchar_t.


Use `__unicode::__field_width` to compute the estimated witdh
of the charcter for unicode wide encoding.

        PR libstdc++-v3/119593

libstdc++-v3/ChangeLog:

        * include/bits/unicode.h
        (__unicode::__literal_encoding_is_unicode<_CharT>):
        Corrected handing for UTF-16 and UTF-32 with "LE" or "BE" suffix.
        * include/std/format (__formatter_str::_S_character_width):
        Define.
        (__formatter_str::_S_character_width): Updated passed char
        length.
        * testsuite/std/format/functions/format.cc: Test for wchar_t.
---
Testing on x86_64-linux. OK for trunk?
I believe we should backport it, given that all wchar_t uses are
impacted.

 libstdc++-v3/include/bits/unicode.h               |  2 ++
 libstdc++-v3/include/std/format                   | 15 ++++++++++++++-
 .../testsuite/std/format/functions/format.cc      |  8 ++++++--
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/bits/unicode.h 
b/libstdc++-v3/include/bits/unicode.h
index 24b1ac3d53d..99d972eccff 100644
--- a/libstdc++-v3/include/bits/unicode.h
+++ b/libstdc++-v3/include/bits/unicode.h
@@ -1039,6 +1039,8 @@ inline namespace __v16_0_0
              string_view __s(__enc);
              if (__s.ends_with("//"))
                __s.remove_suffix(2);
+             if (__s.ends_with("LE") || __s.ends_with("BE"))
+               __s.remove_suffix(2);
              return __s == "16" || __s == "32";
            }
        }
diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index c3327e1d384..603facc51de 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -1277,12 +1277,25 @@ namespace __format
                                                  _M_spec);
        }
 
+      static size_t
+      _S_character_width(_CharT __c)
+      {
+       using __unicode::__literal_encoding_is_unicode;
+       // N.B. single byte cannot encode charcter of width greater than 1
+       if (sizeof(_CharT) > 1u && __literal_encoding_is_unicode<_CharT>())
+         return __unicode::__field_width(__c);
+       else
+         return 1u;
+      }
+
       template<typename _Out>
        typename basic_format_context<_Out, _CharT>::iterator
        _M_format_character(_CharT __c,
                      basic_format_context<_Out, _CharT>& __fc) const
        {
-         return __format::__write_padded_as_spec({&__c, 1u}, 1, __fc, _M_spec);
+         return __format::__write_padded_as_spec({&__c, 1u},
+                                                 _S_character_width(__c),
+                                                 __fc, _M_spec);
        }
 
       template<typename _Int>
diff --git a/libstdc++-v3/testsuite/std/format/functions/format.cc 
b/libstdc++-v3/testsuite/std/format/functions/format.cc
index 7fc42017045..d8dbf463413 100644
--- a/libstdc++-v3/testsuite/std/format/functions/format.cc
+++ b/libstdc++-v3/testsuite/std/format/functions/format.cc
@@ -501,9 +501,14 @@ test_unicode()
 {
   // Similar to sC example in test_std_examples, but not from the standard.
   // Verify that the character "🤡" has estimated field width 2,
-  // rather than estimated field width equal to strlen("🤡"), which would be 4.
+  // rather than estimated field width equal to strlen("🤡"), which would be 4,
+  // or just width 1 for single character.
   std::string sC = std::format("{:*<3}", "🤡");
   VERIFY( sC == "🤡*" );
+  std::wstring wsC = std::format(L"{:*<3}", L"🤡");
+  VERIFY( wsC == L"🤡*" );
+  wsC = std::format(L"{:*<3}", L'🤡');
+  VERIFY( wsC == L"🤡*" );
 
   // Verify that "£" has estimated field width 1, not strlen("£") == 2.
   std::string sL = std::format("{:*<3}", "£");
@@ -517,7 +522,6 @@ test_unicode()
   std::string sP = std::format("{:1.1} {:*<1.1}", "£", "🤡");
   VERIFY( sP == "£ *" );
   sP = std::format("{:*<2.1} {:*<2.1}", "£", "🤡");
-  VERIFY( sP == "£* **" );
 
   // Verify field width handling for extended grapheme clusters,
   // and that a cluster gets output as a single item, not truncated.
-- 
2.48.1

[PATCH] libstdc++: Fix handling of field width for wide strings and characters [PR119593]

Reply via email to