If a locale's THOUSANDS_SEP or MON_THOUSANDS_SEP string is not a
single character we either need to narrow it to a single char or
ignore it (and therefore disable digit grouping for that facet).

        PR libstdc++/87642
        * config/locale/gnu/monetary_members.cc
        (moneypunct<char, true>::_M_initialize_moneypunct): Use
        __narrow_multibyte_chars to convert multibyte thousands separators
        to a single char.
        * config/locale/gnu/numeric_members.cc
        (numpunct<char>::_M_initialize_numpunct): Likewise.
        (__narrow_multibyte_chars): New function.

Tested x86_64-linux, committed to trunk.


commit a8278bf69de1e5f5191b5fd434084eac7db2a1cc
Author: Jonathan Wakely <jwak...@redhat.com>
Date:   Thu Oct 18 16:26:24 2018 +0100

    PR libstdc++/87642 handle multibyte thousands separators from libc
    
    If a locale's THOUSANDS_SEP or MON_THOUSANDS_SEP string is not a
    single character we either need to narrow it to a single char or
    ignore it (and therefore disable digit grouping for that facet).
    
            PR libstdc++/87642
            * config/locale/gnu/monetary_members.cc
            (moneypunct<char, true>::_M_initialize_moneypunct): Use
            __narrow_multibyte_chars to convert multibyte thousands separators
            to a single char.
            * config/locale/gnu/numeric_members.cc
            (numpunct<char>::_M_initialize_numpunct): Likewise.
            (__narrow_multibyte_chars): New function.

diff --git a/libstdc++-v3/config/locale/gnu/monetary_members.cc 
b/libstdc++-v3/config/locale/gnu/monetary_members.cc
index b3e7645385a..212c68dd501 100644
--- a/libstdc++-v3/config/locale/gnu/monetary_members.cc
+++ b/libstdc++-v3/config/locale/gnu/monetary_members.cc
@@ -207,6 +207,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   }
 #endif
 
+  extern char __narrow_multibyte_chars(const char* s, __locale_t cloc);
+
   template<>
     void
     moneypunct<char, true>::_M_initialize_moneypunct(__c_locale __cloc,
@@ -241,8 +243,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          // Named locale.
          _M_data->_M_decimal_point = *(__nl_langinfo_l(__MON_DECIMAL_POINT,
                                                        __cloc));
-         _M_data->_M_thousands_sep = *(__nl_langinfo_l(__MON_THOUSANDS_SEP,
-                                                       __cloc));
+         const char* thousands_sep = __nl_langinfo_l(__MON_THOUSANDS_SEP,
+                                                     __cloc);
+         if (thousands_sep[0] != '\0' && thousands_sep[1] != '\0')
+           _M_data->_M_thousands_sep = __narrow_multibyte_chars(thousands_sep,
+                                                                __cloc);
+         else
+           _M_data->_M_thousands_sep = *thousands_sep;
 
          // Check for NULL, which implies no fractional digits.
          if (_M_data->_M_decimal_point == '\0')
diff --git a/libstdc++-v3/config/locale/gnu/numeric_members.cc 
b/libstdc++-v3/config/locale/gnu/numeric_members.cc
index 1ede8fadbd0..faa35777cf3 100644
--- a/libstdc++-v3/config/locale/gnu/numeric_members.cc
+++ b/libstdc++-v3/config/locale/gnu/numeric_members.cc
@@ -30,11 +30,62 @@
 
 #include <locale>
 #include <bits/c++locale_internal.h>
+#include <iconv.h>
 
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
+  extern char __narrow_multibyte_chars(const char* s, __locale_t cloc);
+
+// This file might be compiled twice, but we only want to define this once.
+#if ! _GLIBCXX_USE_CXX11_ABI
+  char
+  __narrow_multibyte_chars(const char* s, __locale_t cloc)
+  {
+    const char* codeset = __nl_langinfo_l(CODESET, cloc);
+    if (!strcmp(codeset, "UTF-8"))
+      {
+       // optimize for some known cases
+       if (!strcmp(s, "\u202F")) // NARROW NO-BREAK SPACE
+         return ' ';
+       if (!strcmp(s, "\u2019")) // RIGHT SINGLE QUOTATION MARK
+         return '\'';
+       if (!strcmp(s, "\u066C")) // ARABIC THOUSANDS SEPARATOR
+         return '\'';
+      }
+
+    iconv_t cd = iconv_open("ASCII//TRANSLIT", codeset);
+    if (cd != (iconv_t)-1)
+      {
+       char c1;
+       size_t inbytesleft = strlen(s);
+       size_t outbytesleft = 1;
+       char* inbuf = const_cast<char*>(s);
+       char* outbuf = &c1;
+       size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+       iconv_close(cd);
+       if (n != (size_t)-1)
+         {
+           cd = iconv_open(codeset, "ASCII");
+           if (cd != (iconv_t)-1)
+             {
+               char c2;
+               inbuf = &c1;
+               inbytesleft = 1;
+               outbuf = &c2;
+               outbytesleft = 1;
+               n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+               iconv_close(cd);
+               if (n != (size_t)-1)
+                 return c2;
+             }
+         }
+      }
+    return '\0';
+  }
+#endif
+
   template<>
     void
     numpunct<char>::_M_initialize_numpunct(__c_locale __cloc)
@@ -63,8 +114,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          // Named locale.
          _M_data->_M_decimal_point = *(__nl_langinfo_l(DECIMAL_POINT,
                                                        __cloc));
-         _M_data->_M_thousands_sep = *(__nl_langinfo_l(THOUSANDS_SEP,
-                                                       __cloc));
+         const char* thousands_sep = __nl_langinfo_l(THOUSANDS_SEP, __cloc);
+
+         if (thousands_sep[0] != '\0' && thousands_sep[1] != '\0')
+           _M_data->_M_thousands_sep = __narrow_multibyte_chars(thousands_sep,
+                                                                __cloc);
+         else
+           _M_data->_M_thousands_sep = *thousands_sep;
 
          // Check for NULL, which implies no grouping.
          if (_M_data->_M_thousands_sep == '\0')

Reply via email to