Hi
Here is a revisited version. I finally go without compiling in
C++11 as I prefer to use __builtin_alloca instead of using
std::unique_ptr and heap memory. I also realized that I could use
codecvt::max_length to know the max number of multibytes.
Tested under Linux x86_64 with all locales installed. There are
some failures but not coming from this patch, will surely be the subject
of another mail.
2014-11-28 François Dumont <fdum...@gcc.gnu.org>
DR libstdc++/13631
* include/bits/codecvt.h (__get_c_locale): New.
* config/locale/gnu/messages_member.h, messages_member.cc: Prefer
dgettext usage to gettext to allow usage of several catalogs at the
same time. Add an internal cache to make catalog names to catalog ids.
Add charset management.
* testsuite/22_locale/messages/13631.cc: New.
* testsuite/22_locale/messages/members/char/2.cc: Use also fr_FR locale
for charset conversion to get the expected accentuated character.
Do we need any abi tag ? Do we need to wait ?
François
On 24/11/2014 01:23, Jonathan Wakely wrote:
On 24/11/14 00:13 +0100, François Dumont wrote:
Hello
As we are at doing some evolution in the ABI I would like to take
the opportunity to merge branch libstdcxx_so_7-2. The first fix was
I don't think we want to merge everything, but it's certainly worth
looking to see if there are some fixes on that branch worth taking.
It would have been better to do during stage 1 though :-\
about a messages facet issue. So here is the version for the trunk
which is the one from the branch plus management of the charset part.
This way messages<wchar_t> works too.
There are still some uncovered points in this patch:
- I had to make codecvt _M_c_locale_codecvt public to access it from
the messages facet code. How do you want to handle it properly ? A
friend function to access it or do I make messages facet friend ?
- I haven't use the ABI tag yet. I know that there is a plan to tag
locale facets, will it work for what I am doing ?
Unless I'm missing something you're not making any ABI changes to
std::messages, just making the definitiosn of some functions no longer
inline.
Note that I could use std::tuple instead of combination of
std::pair and std::unique_ptr instead of wchar_buffer if we were
building it in C++11 mode. Is it possible ?
Yes, the symlink to the messages_members.cc file would need to be
moved from src/c++98/Makefile.am to src/c++11/Makefile.am
Index: include/bits/locale_facets_nonio.h
===================================================================
--- include/bits/locale_facets_nonio.h (revision 217816)
+++ include/bits/locale_facets_nonio.h (working copy)
@@ -1842,22 +1842,6 @@
*/
virtual void
do_close(catalog) const;
-
- // Returns a locale and codeset-converted string, given a
char* message.
- char*
- _M_convert_to_char(const string_type& __msg) const
- {
- // XXX
- return reinterpret_cast<char*>(const_cast<_CharT*>(__msg.c_str()));
- }
-
- // Returns a locale and codeset-converted string, given a
char* message.
- string_type
- _M_convert_from_char(char*) const
- {
- // XXX
- return string_type();
- }
};
Those members are used by the ieee_1003.1-2001 locale.
Index: config/locale/gnu/messages_members.cc
===================================================================
--- config/locale/gnu/messages_members.cc (revision 218027)
+++ config/locale/gnu/messages_members.cc (working copy)
@@ -31,54 +31,272 @@
#include <locale>
#include <bits/c++locale_internal.h>
+#include <algorithm>
+#include <utility>
+
+#include <ext/concurrence.h>
+
+namespace
+{
+ using namespace std;
+
+ class Catalogs
+ {
+ public:
+ typedef messages_base::catalog catalog_id;
+
+ struct catalog_info
+ {
+ catalog_info()
+ { }
+
+ catalog_info(catalog_id __id, const char* __domain, locale __loc)
+ : _M_id(__id), _M_domain(__domain), _M_locale(__loc)
+ { }
+
+ catalog_id _M_id;
+ const char* _M_domain;
+ locale _M_locale;
+ };
+
+ typedef pair<const char*, locale> result_type;
+
+ Catalogs() : _M_counter(0), _M_nb_entry(0) { }
+
+ ~Catalogs()
+ {
+ if (_M_nb_entry)
+ {
+ for (size_t i = 0; i != _M_nb_entry; ++i)
+ delete[] _M_map[i]._M_domain;
+ delete[] _M_map;
+ }
+ }
+
+ catalog_id
+ _M_add(const string& __domain, locale __l)
+ {
+ __gnu_cxx::__scoped_lock lock(_M_mutex);
+
+ catalog_info* __new_map = new catalog_info[_M_nb_entry + 1];
+ __try
+ {
+ copy(_M_map, _M_map + _M_nb_entry, __new_map);
+ char* __domain_copy = new char[__domain.size() + 1];
+ __domain.copy(__domain_copy, __domain.size());
+ __domain_copy[__domain.size()] = 0;
+ __new_map[_M_nb_entry] = catalog_info(_M_counter, __domain_copy, __l);
+ }
+ __catch(...)
+ {
+ delete[] __new_map;
+ __throw_exception_again;
+ }
+
+ // The counter is not likely to roll unless catalogs keep on being
+ // open/close which is consider as an application mistake for the moment.
+ catalog_id __cat = _M_counter++;
+ delete[] _M_map;
+ _M_map = __new_map;
+ ++_M_nb_entry;
+
+ return __cat;
+ }
+
+ void
+ _M_erase(catalog_id __c)
+ {
+ __gnu_cxx::__scoped_lock lock(_M_mutex);
+
+ catalog_info* __entry =
+ lower_bound(_M_map, _M_map + _M_nb_entry, __c, _Comp());
+ if (__entry == _M_map + _M_nb_entry || __entry->_M_id != __c)
+ return;
+
+ catalog_info* __new_map =
+ _M_nb_entry > 1 ? new catalog_info[_M_nb_entry - 1] : 0;
+ copy(__entry + 1, _M_map + _M_nb_entry,
+ copy(_M_map, __entry, __new_map));
+
+ delete[] __entry->_M_domain;
+ delete[] _M_map;
+ _M_map = __new_map;
+ --_M_nb_entry;
+ }
+
+ result_type
+ _M_get(catalog_id __c) const
+ {
+ __gnu_cxx::__scoped_lock lock(_M_mutex);
+
+ const catalog_info* __entry =
+ lower_bound(_M_map, _M_map + _M_nb_entry, __c, _Comp());
+ if (__entry != _M_map + _M_nb_entry && __entry->_M_id == __c)
+ return result_type(__entry->_M_domain, __entry->_M_locale);
+ return result_type(0, locale());
+ }
+
+ private:
+ struct _Comp
+ {
+ bool operator()(catalog_id __cat, const catalog_info& __info) const
+ { return __cat < __info._M_id; }
+
+ bool operator()(const catalog_info& __info, catalog_id __cat) const
+ { return __info._M_id < __cat; }
+ };
+
+ mutable __gnu_cxx::__mutex _M_mutex;
+ catalog_id _M_counter;
+ catalog_info* _M_map;
+ size_t _M_nb_entry;
+ };
+
+ Catalogs&
+ get_catalogs()
+ {
+ static Catalogs __catalogs;
+ return __catalogs;
+ }
+
+ const char*
+ get_glibc_msg(__c_locale __attribute__((unused)) __locale_messages,
+ const char* __domainname,
+ const char* __dfault)
+ {
+#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
+ std::__c_locale __old = __uselocale(__locale_messages);
+ const char* __msg =
+ const_cast<const char*>(dgettext(__domainname, __dfault));
+ __uselocale(__old);
+#else
+ char* __old = setlocale(LC_ALL, 0);
+ const size_t __len = strlen(__old) + 1;
+ char* __sav = new char[__len];
+ memcpy(__sav, __old, __len);
+ setlocale(LC_ALL, _M_name_messages);
+ const char* __msg = dgettext(__domainname, __dfault);
+ setlocale(LC_ALL, __sav);
+ delete [] __sav;
+#endif
+
+ return __msg;
+ }
+}
+
namespace std _GLIBCXX_VISIBILITY(default)
{
-_GLIBCXX_BEGIN_NAMESPACE_VERSION
+ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Specializations.
template<>
+ typename messages<char>::catalog
+ messages<char>::do_open(const basic_string<char>& __s,
+ const locale& __l) const
+ {
+ typedef codecvt<char, char, mbstate_t> __codecvt_t;
+ const __codecvt_t& __codecvt = use_facet<__codecvt_t>(__l);
+
+ bind_textdomain_codeset(__s.c_str(),
+ __nl_langinfo_l(CODESET, __get_c_locale(__codecvt)));
+ return get_catalogs()._M_add(__s, __l);
+ }
+
+ template<>
+ void
+ messages<char>::do_close(catalog __c) const
+ { get_catalogs()._M_erase(__c); }
+
+ template<>
string
- messages<char>::do_get(catalog, int, int, const string& __dfault) const
+ messages<char>::do_get(catalog __c, int, int,
+ const string& __dfault) const
{
-#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
- __c_locale __old = __uselocale(_M_c_locale_messages);
- const char* __msg = const_cast<const char*>(gettext(__dfault.c_str()));
- __uselocale(__old);
- return string(__msg);
-#else
- char* __old = setlocale(LC_ALL, 0);
- const size_t __len = strlen(__old) + 1;
- char* __sav = new char[__len];
- memcpy(__sav, __old, __len);
- setlocale(LC_ALL, _M_name_messages);
- const char* __msg = gettext(__dfault.c_str());
- setlocale(LC_ALL, __sav);
- delete [] __sav;
- return string(__msg);
-#endif
+ if (__c < 0)
+ return __dfault;
+
+ Catalogs::result_type __cat_info = get_catalogs()._M_get(__c);
+
+ if (!__cat_info.first)
+ return __dfault;
+
+ return get_glibc_msg(_M_c_locale_messages, __cat_info.first,
+ __dfault.c_str());
}
#ifdef _GLIBCXX_USE_WCHAR_T
template<>
+ typename messages<wchar_t>::catalog
+ messages<wchar_t>::do_open(const basic_string<char>& __s,
+ const locale& __l) const
+ {
+ typedef codecvt<wchar_t, char, mbstate_t> __codecvt_t;
+ const __codecvt_t& __codecvt = use_facet<__codecvt_t>(__l);
+
+ bind_textdomain_codeset(__s.c_str(),
+ __nl_langinfo_l(CODESET, __get_c_locale(__codecvt)));
+
+ return get_catalogs()._M_add(__s, __l);
+ }
+
+ template<>
+ void
+ messages<wchar_t>::do_close(catalog __c) const
+ { get_catalogs()._M_erase(__c); }
+
+ template<>
wstring
- messages<wchar_t>::do_get(catalog, int, int, const wstring& __dfault) const
+ messages<wchar_t>::do_get(catalog __c, int, int,
+ const wstring& __wdfault) const
{
-# if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
- __c_locale __old = __uselocale(_M_c_locale_messages);
- char* __msg = gettext(_M_convert_to_char(__dfault));
- __uselocale(__old);
- return _M_convert_from_char(__msg);
-# else
- char* __old = setlocale(LC_ALL, 0);
- const size_t __len = strlen(__old) + 1;
- char* __sav = new char[__len];
- memcpy(__sav, __old, __len);
- setlocale(LC_ALL, _M_name_messages);
- char* __msg = gettext(_M_convert_to_char(__dfault));
- setlocale(LC_ALL, __sav);
- delete [] __sav;
- return _M_convert_from_char(__msg);
-# endif
+ if (__c < 0 || __wdfault.empty())
+ return __wdfault;
+
+ Catalogs::result_type __cat_info = get_catalogs()._M_get(__c);
+
+ if (!__cat_info.first)
+ return __wdfault;
+
+ typedef codecvt<wchar_t, char, mbstate_t> __codecvt_t;
+ const __codecvt_t& __conv =
+ use_facet<__codecvt_t>(__cat_info.second);
+
+ const char* __translation;
+ mbstate_t __state;
+ __builtin_memset(&__state, 0, sizeof(mbstate_t));
+ {
+ const wchar_t* __wdfault_next;
+ size_t __mb_size = __wdfault.size() * __conv.max_length();;
+ char* __dfault =
+ static_cast<char*>(__builtin_alloca(sizeof(char) * (__mb_size + 1)));
+ char* __dfault_next;
+ __conv.out(__state,
+ __wdfault.data(), __wdfault.data() + __wdfault.size(),
+ __wdfault_next,
+ __dfault, __dfault + __mb_size, __dfault_next);
+
+ // Make sure string passed to dgettext is \0 terminated.
+ *__dfault_next = '\0';
+ __translation
+ = get_glibc_msg(_M_c_locale_messages, __cat_info.first, __dfault);
+
+ // If we end up getting default value back we can simply return original
+ // default value.
+ if (__translation == __dfault)
+ return __wdfault;
+ }
+
+ __builtin_memset(&__state, 0, sizeof(mbstate_t));
+ size_t __size = __builtin_strlen(__translation);
+ const char* __translation_next;
+ wchar_t* __wtranslation =
+ static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t) * (__size + 1)));
+ wchar_t* __wtranslation_next;
+ __conv.in(__state, __translation, __translation + __size,
+ __translation_next,
+ __wtranslation, __wtranslation + __size,
+ __wtranslation_next);
+ return wstring(__wtranslation, __wtranslation_next);
}
#endif
Index: config/locale/gnu/messages_members.h
===================================================================
--- config/locale/gnu/messages_members.h (revision 218027)
+++ config/locale/gnu/messages_members.h (working copy)
@@ -83,22 +83,6 @@
_S_destroy_c_locale(_M_c_locale_messages);
}
- template<typename _CharT>
- typename messages<_CharT>::catalog
- messages<_CharT>::do_open(const basic_string<char>& __s,
- const locale&) const
- {
- // No error checking is done, assume the catalog exists and can
- // be used.
- textdomain(__s.c_str());
- return 0;
- }
-
- template<typename _CharT>
- void
- messages<_CharT>::do_close(catalog) const
- { }
-
// messages_byname
template<typename _CharT>
messages_byname<_CharT>::messages_byname(const char* __s, size_t __refs)
@@ -126,5 +110,25 @@
}
}
+ template<>
+ typename messages<char>::catalog
+ messages<char>::do_open(const basic_string<char>&,
+ const locale&) const;
+
+ template<>
+ void
+ messages<char>::do_close(catalog) const;
+
+#ifdef _GLIBCXX_USE_WCHAR_T
+ template<>
+ typename messages<wchar_t>::catalog
+ messages<wchar_t>::do_open(const basic_string<char>&,
+ const locale&) const;
+
+ template<>
+ void
+ messages<wchar_t>::do_close(catalog) const;
+#endif
+
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace
Index: include/bits/codecvt.h
===================================================================
--- include/bits/codecvt.h (revision 218027)
+++ include/bits/codecvt.h (working copy)
@@ -263,8 +263,6 @@
do_max_length() const throw() = 0;
};
-
-
/**
* @brief Primary class template codecvt.
* @ingroup locales
@@ -390,6 +388,10 @@
virtual int
do_max_length() const throw();
+
+ friend __c_locale
+ __get_c_locale(const codecvt& __codecvt)
+ { return __codecvt._M_c_locale_codecvt; }
};
#ifdef _GLIBCXX_USE_WCHAR_T
@@ -450,6 +452,10 @@
virtual int
do_max_length() const throw();
+
+ friend __c_locale
+ __get_c_locale(const codecvt& __codecvt)
+ { return __codecvt._M_c_locale_codecvt; }
};
#endif //_GLIBCXX_USE_WCHAR_T
Index: testsuite/22_locale/messages/13631.cc
===================================================================
--- testsuite/22_locale/messages/13631.cc (revision 0)
+++ testsuite/22_locale/messages/13631.cc (working copy)
@@ -0,0 +1,99 @@
+// { dg-require-namedlocale "fr_FR" }
+
+// Copyright (C) 2014 Free Software Foundation
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <locale>
+#include <testsuite_hooks.h>
+
+void test01()
+{
+ bool test __attribute__((unused)) = true;
+
+ // This is defined through CXXFLAGS in scripts/testsuite_flags[.in].
+ const char* dir = LOCALEDIR;
+
+ std::locale l("fr_FR");
+
+ typedef std::messages<char> messages;
+
+ const messages &msgs_facet = std::use_facet<messages>(l);
+
+ messages::catalog msgs = msgs_facet.open("libstdc++", l, dir);
+ VERIFY( msgs >= 0 );
+
+ const char msgid[] = "please";
+ std::string translation1 = msgs_facet.get(msgs, 0, 0, msgid);
+
+ // Without a real translation this test doesn't mean anything:
+ VERIFY( translation1 != msgid );
+
+ // Opening an other catalog was enough to show the problem, even a fake
+ // catalog.
+ messages::catalog fake_msgs = msgs_facet.open("fake", l);
+
+ std::string translation2 = msgs_facet.get(msgs, 0, 0, msgid);
+
+ // Close catalogs before doing the check to avoid leaks.
+ msgs_facet.close(fake_msgs);
+ msgs_facet.close(msgs);
+
+ VERIFY( translation1 == translation2 );
+}
+
+void test02()
+{
+ bool test __attribute__((unused)) = true;
+
+ // This is defined through CXXFLAGS in scripts/testsuite_flags[.in].
+ const char* dir = LOCALEDIR;
+
+ std::locale l("fr_FR");
+
+ typedef std::messages<wchar_t> messages;
+
+ const messages &msgs_facet = std::use_facet<messages>(l);
+
+ messages::catalog msgs = msgs_facet.open("libstdc++", l, dir);
+ VERIFY( msgs >= 0 );
+
+ const wchar_t msgid[] = L"please";
+ std::wstring translation1 = msgs_facet.get(msgs, 0, 0, msgid);
+
+ // Without a real translation this test doesn't mean anything:
+ VERIFY( !translation1.empty() );
+ VERIFY( translation1 != msgid );
+
+ // Opening an other catalog was enough to show the problem, even a fake
+ // catalog.
+ messages::catalog fake_msgs = msgs_facet.open("fake", l);
+
+ std::wstring translation2 = msgs_facet.get(msgs, 0, 0, msgid);
+
+ // Close catalogs before doing the check to avoid leaks.
+ msgs_facet.close(fake_msgs);
+ msgs_facet.close(msgs);
+
+ VERIFY( translation1 == translation2 );
+}
+
+int main()
+{
+ test01();
+ test02();
+ return 0;
+}
Index: testsuite/22_locale/messages/members/char/2.cc
===================================================================
--- testsuite/22_locale/messages/members/char/2.cc (revision 218027)
+++ testsuite/22_locale/messages/members/char/2.cc (working copy)
@@ -35,9 +35,8 @@
const char* dir = LOCALEDIR;
// basic construction
- locale loc_c = locale::classic();
locale loc_fr = locale("fr_FR");
- VERIFY( loc_c != loc_fr );
+ VERIFY( locale::classic() != loc_fr );
// cache the messages facets
const messages<char>& mssg_fr = use_facet<messages<char> >(loc_fr);
@@ -47,7 +46,7 @@
// void close(catalog) const;
// Check French (fr_FR) locale.
- catalog cat_fr = mssg_fr.open("libstdc++", loc_c, dir);
+ catalog cat_fr = mssg_fr.open("libstdc++", loc_fr, dir);
string s01 = mssg_fr.get(cat_fr, 0, 0, "please");
string s02 = mssg_fr.get(cat_fr, 0, 0, "thank you");
VERIFY ( s01 == "s'il vous plaît" );