vangyzen created this revision.
vangyzen added a subscriber: cfe-commits.
Herald added a subscriber: emaste.
Herald added a reviewer: EricWF.

numpunct_byname<wchar_t> assumed that decimal_point and thousands_sep
were ASCII and simply copied the first byte from them.  Add support
for multibyte strings in these fields.

I found this problem on FreeBSD 11, where thousands_sep in fr_FR.UTF-8
is a no-break space (U+00A0).


https://reviews.llvm.org/D27167

Files:
  src/locale.cpp


Index: src/locale.cpp
===================================================================
--- src/locale.cpp
+++ src/locale.cpp
@@ -4281,23 +4281,51 @@
 {
 }
 
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wmissing-braces"
+#endif
+
 void
 numpunct_byname<wchar_t>::__init(const char* nm)
 {
     if (strcmp(nm, "C") != 0)
     {
         __locale_unique_ptr loc(newlocale(LC_ALL_MASK, nm, 0), freelocale);
         if (loc == nullptr)
-            __throw_runtime_error("numpunct_byname<char>::numpunct_byname"
-                                " failed to construct for " + string(nm));
+            __throw_runtime_error("numpunct_byname<wchar_t>::numpunct_byname"
+                                  " failed to construct for " + string(nm));
 
         lconv* lc = __libcpp_localeconv_l(loc.get());
-        if (*lc->decimal_point)
-            __decimal_point_ = *lc->decimal_point;
-        if (*lc->thousands_sep)
-            __thousands_sep_ = *lc->thousands_sep;
+        if (*lc->decimal_point) {
+            size_t len = strlen(lc->decimal_point);
+            mbstate_t mbs = {0};
+            wchar_t wc;
+            size_t nb = __libcpp_mbrtowc_l(&wc, lc->decimal_point, len, &mbs,
+                loc.get());
+            if (nb == len) {
+                __decimal_point_ = wc;
+            } else {
+                __throw_runtime_error("numpunct_byname<wchar_t>: decimal_point"
+                                      " is not a valid multibyte character: " +
+                                      string(lc->decimal_point));
+            }
+        }
+        if (*lc->thousands_sep) {
+            size_t len = strlen(lc->thousands_sep);
+            mbstate_t mbs = {0};
+            wchar_t wc;
+            size_t nb = __libcpp_mbrtowc_l(&wc, lc->thousands_sep, len, &mbs,
+                loc.get());
+            if (nb == len) {
+                __thousands_sep_ = wc;
+            } else {
+                __throw_runtime_error("numpunct_byname<wchar_t>: thousands_sep"
+                                      " is not a valid multibyte character: " +
+                                      string(lc->thousands_sep));
+            }
+        }
         __grouping_ = lc->grouping;
-        // locallization for truename and falsename is not available
+        // localization for truename and falsename is not available
     }
 }
 
@@ -4861,10 +4889,6 @@
     return result;
 }
 
-#if defined(__clang__)
-#pragma clang diagnostic ignored "-Wmissing-braces"
-#endif
-
 template <>
 wstring
 __time_get_storage<wchar_t>::__analyze(char fmt, const ctype<wchar_t>& ct)


Index: src/locale.cpp
===================================================================
--- src/locale.cpp
+++ src/locale.cpp
@@ -4281,23 +4281,51 @@
 {
 }
 
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wmissing-braces"
+#endif
+
 void
 numpunct_byname<wchar_t>::__init(const char* nm)
 {
     if (strcmp(nm, "C") != 0)
     {
         __locale_unique_ptr loc(newlocale(LC_ALL_MASK, nm, 0), freelocale);
         if (loc == nullptr)
-            __throw_runtime_error("numpunct_byname<char>::numpunct_byname"
-                                " failed to construct for " + string(nm));
+            __throw_runtime_error("numpunct_byname<wchar_t>::numpunct_byname"
+                                  " failed to construct for " + string(nm));
 
         lconv* lc = __libcpp_localeconv_l(loc.get());
-        if (*lc->decimal_point)
-            __decimal_point_ = *lc->decimal_point;
-        if (*lc->thousands_sep)
-            __thousands_sep_ = *lc->thousands_sep;
+        if (*lc->decimal_point) {
+            size_t len = strlen(lc->decimal_point);
+            mbstate_t mbs = {0};
+            wchar_t wc;
+            size_t nb = __libcpp_mbrtowc_l(&wc, lc->decimal_point, len, &mbs,
+                loc.get());
+            if (nb == len) {
+                __decimal_point_ = wc;
+            } else {
+                __throw_runtime_error("numpunct_byname<wchar_t>: decimal_point"
+                                      " is not a valid multibyte character: " +
+                                      string(lc->decimal_point));
+            }
+        }
+        if (*lc->thousands_sep) {
+            size_t len = strlen(lc->thousands_sep);
+            mbstate_t mbs = {0};
+            wchar_t wc;
+            size_t nb = __libcpp_mbrtowc_l(&wc, lc->thousands_sep, len, &mbs,
+                loc.get());
+            if (nb == len) {
+                __thousands_sep_ = wc;
+            } else {
+                __throw_runtime_error("numpunct_byname<wchar_t>: thousands_sep"
+                                      " is not a valid multibyte character: " +
+                                      string(lc->thousands_sep));
+            }
+        }
         __grouping_ = lc->grouping;
-        // locallization for truename and falsename is not available
+        // localization for truename and falsename is not available
     }
 }
 
@@ -4861,10 +4889,6 @@
     return result;
 }
 
-#if defined(__clang__)
-#pragma clang diagnostic ignored "-Wmissing-braces"
-#endif
-
 template <>
 wstring
 __time_get_storage<wchar_t>::__analyze(char fmt, const ctype<wchar_t>& ct)
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to