Enrico Forestieri wrote:

> On Tue, Oct 08, 2013 at 09:49:44PM +0200, Vincent van Ravesteijn wrote:
>> 
>> I tried to use QThreadStorage, which works on Windows, but on Linux
>> I keep getting segmentation faults.

Vincent, I extended your patch to protect all IconvProcessor instances. 
Thanks to Enricos documentation links I am now also sure that the mutex in 
convert() is not needed, so I removed it. Using thread-local storage has 
also the advantage that we can share one IconvProcessor instance per thread 
for each encoding pair. I believe that this patch makes all iconv 
conversions 100% thread-safe. I did not see any segfault on Debian Wheezy, 
and I could not get any wrong conversion. What do others think?

> I tried your patch on Cygwin, Solaris, and Debian Linux. It worked fine
> in all cases. However, and this occurs *only* on Debian, on quitting lyx
> I get:
> 
> lyx: QObject::startTimer: QTimer can only be used with threads started
> with QThread SIGSEGV signal caught!
> Sorry, you have found a bug in LyX, hope you have not lost any data.
> Please read the bug-reporting instructions in 'Help->Introduction' and
> send us a bug report, if necessary. Thanks!

Release or debug version? Do you get it with the attached patch as well?

> The other difference I noticed is that on Cygwin and Solaris I get
> \renewcommand instead of \newcommand. That is, applying your patch,
> on Debian exporting from the GUI or from command line, produces the
> same result, but lyx segafaults on exit.
> On Cygwin and Solaris, exporting from the GUI changes \newcommand
> into \renewcommand, but lyx quits normally.
> The joy of multithreading, I suppose.

I am not sure. I would think that this is something else. I do not see this, 
but if I use the Czech GUI I see that \shortcut{undefined} is translated if 
exported from the GUI, and untranslated if exported from the command line. 
This should not happen.


Georg
diff --git a/src/support/docstring.cpp b/src/support/docstring.cpp
index 727e1f5..f44259d 100644
--- a/src/support/docstring.cpp
+++ b/src/support/docstring.cpp
@@ -69,14 +69,6 @@ string const to_ascii(docstring const & ucs4)
 }
 
 
-IconvProcessor & utf8ToUcs4()
-{
-	static IconvProcessor iconv(ucs4_codeset, "UTF-8");
-	return iconv;
-}
-
-
-
 void utf8_to_ucs4(string const & utf8, docstring & ucs4)
 {
 	size_t n = utf8.size();
diff --git a/src/support/unicode.cpp b/src/support/unicode.cpp
index 95415a5..343b8de 100644
--- a/src/support/unicode.cpp
+++ b/src/support/unicode.cpp
@@ -14,7 +14,8 @@
 
 #include "support/unicode.h"
 #include "support/debug.h"
-#include "support/mutex.h"
+
+#include <QThreadStorage>
 
 #include <iconv.h>
 
@@ -66,8 +67,6 @@ struct IconvProcessor::Impl
 	iconv_t cd;
 	string tocode_;
 	string fromcode_;
-
-	Mutex mutex_; // iconv() is not thread save, see #7240
 };
 
 
@@ -124,8 +123,6 @@ bool IconvProcessor::init()
 int IconvProcessor::convert(char const * buf, size_t buflen,
 		char * outbuf, size_t maxoutsize)
 {
-	Mutex::Locker lock(&pimpl_->mutex_);
-
 	if (buflen == 0)
 		return 0;
 
@@ -228,7 +225,10 @@ iconv_convert(IconvProcessor & processor, InType const * buf, size_t buflen)
 	char const * inbuf = reinterpret_cast<char const *>(buf);
 	size_t inbytesleft = buflen * sizeof(InType);
 
-	static std::vector<char> outbuf(32768);
+	static QThreadStorage<std::vector<char> *> static_outbuf;
+	if (!static_outbuf.hasLocalData())
+		static_outbuf.setLocalData(new std::vector<char>(32768));
+	std::vector<char> & outbuf = *static_outbuf.localData();
 	// The number of UCS4 code points in buf is at most inbytesleft.
 	// The output encoding will use at most
 	// max_encoded_bytes(pimpl_->tocode_) per UCS4 code point.
@@ -249,6 +249,15 @@ iconv_convert(IconvProcessor & processor, InType const * buf, size_t buflen)
 } // anon namespace
 
 
+IconvProcessor & utf8ToUcs4()
+{
+	static QThreadStorage<IconvProcessor *> processor;
+	if (!processor.hasLocalData())
+		processor.setLocalData(new IconvProcessor(ucs4_codeset, "UTF-8"));
+	return *processor.localData();
+}
+
+
 vector<char_type> utf8_to_ucs4(vector<char> const & utf8str)
 {
 	if (utf8str.empty())
@@ -261,32 +270,43 @@ vector<char_type> utf8_to_ucs4(vector<char> const & utf8str)
 vector<char_type>
 utf8_to_ucs4(char const * utf8str, size_t ls)
 {
-	static IconvProcessor processor(ucs4_codeset, "UTF-8");
-	return iconv_convert<char_type>(processor, utf8str, ls);
+	return iconv_convert<char_type>(utf8ToUcs4(), utf8str, ls);
 }
 
 
 vector<char_type>
 utf16_to_ucs4(unsigned short const * s, size_t ls)
 {
-	static IconvProcessor processor(ucs4_codeset, utf16_codeset);
-	return iconv_convert<char_type>(processor, s, ls);
+	static QThreadStorage<IconvProcessor *> processor;
+	if (!processor.hasLocalData())
+		processor.setLocalData(new IconvProcessor(ucs4_codeset, utf16_codeset));
+	return iconv_convert<char_type>(*processor.localData(), s, ls);
 }
 
 
 vector<unsigned short>
 ucs4_to_utf16(char_type const * s, size_t ls)
 {
-	static IconvProcessor processor(utf16_codeset, ucs4_codeset);
-	return iconv_convert<unsigned short>(processor, s, ls);
+	static QThreadStorage<IconvProcessor *> processor;
+	if (!processor.hasLocalData())
+		processor.setLocalData(new IconvProcessor(utf16_codeset, ucs4_codeset));
+	return iconv_convert<unsigned short>(*processor.localData(), s, ls);
+}
+
+
+IconvProcessor & ucs4ToUtf8()
+{
+	static QThreadStorage<IconvProcessor *> processor;
+	if (!processor.hasLocalData())
+		processor.setLocalData(new IconvProcessor("UTF-8", ucs4_codeset));
+	return *processor.localData();
 }
 
 
 vector<char>
 ucs4_to_utf8(char_type c)
 {
-	static IconvProcessor processor("UTF-8", ucs4_codeset);
-	return iconv_convert<char>(processor, &c, 1);
+	return iconv_convert<char>(ucs4ToUtf8(), &c, 1);
 }
 
 
@@ -303,15 +323,17 @@ ucs4_to_utf8(vector<char_type> const & ucs4str)
 vector<char>
 ucs4_to_utf8(char_type const * ucs4str, size_t ls)
 {
-	static IconvProcessor processor("UTF-8", ucs4_codeset);
-	return iconv_convert<char>(processor, ucs4str, ls);
+	return iconv_convert<char>(ucs4ToUtf8(), ucs4str, ls);
 }
 
 
 vector<char_type>
 eightbit_to_ucs4(char const * s, size_t ls, string const & encoding)
 {
-	static map<string, IconvProcessor> processors;
+	static QThreadStorage<map<string, IconvProcessor> *> static_processors;
+	if (!static_processors.hasLocalData())
+		static_processors.setLocalData(new map<string, IconvProcessor>);
+	map<string, IconvProcessor> & processors = *static_processors.localData();
 	if (processors.find(encoding) == processors.end()) {
 		IconvProcessor processor(ucs4_codeset, encoding.c_str());
 		processors.insert(make_pair(encoding, processor));
@@ -320,10 +342,23 @@ eightbit_to_ucs4(char const * s, size_t ls, string const & encoding)
 }
 
 
+namespace {
+
+map<string, IconvProcessor> & ucs4To8bitProcessors()
+{
+	static QThreadStorage<map<string, IconvProcessor> *> processors;
+	if (!processors.hasLocalData())
+		processors.setLocalData(new map<string, IconvProcessor>);
+	return *processors.localData();
+}
+
+}
+
+
 vector<char>
 ucs4_to_eightbit(char_type const * ucs4str, size_t ls, string const & encoding)
 {
-	static map<string, IconvProcessor> processors;
+	map<string, IconvProcessor> & processors(ucs4To8bitProcessors());
 	if (processors.find(encoding) == processors.end()) {
 		IconvProcessor processor(encoding.c_str(), ucs4_codeset);
 		processors.insert(make_pair(encoding, processor));
@@ -334,7 +369,7 @@ ucs4_to_eightbit(char_type const * ucs4str, size_t ls, string const & encoding)
 
 char ucs4_to_eightbit(char_type ucs4, string const & encoding)
 {
-	static map<string, IconvProcessor> processors;
+	map<string, IconvProcessor> & processors(ucs4To8bitProcessors());
 	map<string, IconvProcessor>::iterator it = processors.find(encoding);
 	if (it == processors.end()) {
 		IconvProcessor processor(encoding.c_str(), ucs4_codeset);
@@ -352,7 +387,10 @@ char ucs4_to_eightbit(char_type ucs4, string const & encoding)
 void ucs4_to_multibytes(char_type ucs4, vector<char> & out,
 	string const & encoding)
 {
-	static map<string, IconvProcessor> processors;
+	static QThreadStorage<map<string, IconvProcessor> *> static_processors;
+	if (!static_processors.hasLocalData())
+		static_processors.setLocalData(new map<string, IconvProcessor>);
+	map<string, IconvProcessor> & processors = *static_processors.localData();
 	map<string, IconvProcessor>::iterator it = processors.find(encoding);
 	if (it == processors.end()) {
 		IconvProcessor processor(encoding.c_str(), ucs4_codeset);
diff --git a/src/support/unicode.h b/src/support/unicode.h
index 6e83180..ddcd6c8 100644
--- a/src/support/unicode.h
+++ b/src/support/unicode.h
@@ -21,6 +21,27 @@
 
 namespace lyx {
 
+/**
+ * Wrapper for iconv(3).
+ *
+ * According to the POSIX standard, all specified functions are thread-safe,
+ * with some exceptions. The iconv() function is not listed as an exception:
+ * http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xsh_chap02.html#tag_22_02_09_09
+ * http://man7.org/linux/man-pages/man7/pthreads.7.html
+ *
+ * Therefore, you can use as many instances of this class in parallel as you
+ * like. However, you need to ensure that each instance is only used by one
+ * thread at any given time. If this condition is not met you get nasty
+ * mixtures of different thread data as in bug 7240.
+ *
+ * From a performance point of view it is best to use one static instance
+ * per thread for each in/out encoding pair. This can e.g. be achieved by
+ * using helpers for thread-local storage such as QThreadStorage or
+ * boost::thread_specific_ptr. A single static instance protected by a mutex
+ * would work as well, and might be preferrable for exotic encoding pairs.
+ * Creating local IconvProcessor instances should be avoided because of the
+ * overhead in iconv_open().
+ */
 class IconvProcessor
 {
 public:
@@ -51,6 +72,10 @@ private:
 	Impl * pimpl_;
 };
 
+/// Get the global IconvProcessor instance of the current thread for
+/// utf8->ucs4 conversions
+IconvProcessor & utf8ToUcs4();
+
 // A single codepoint conversion for utf8_to_ucs4 does not make
 // sense, so that function is left out.
 
@@ -66,6 +91,10 @@ std::vector<char_type> utf16_to_ucs4(unsigned short const * s, size_t ls);
 
 std::vector<unsigned short> ucs4_to_utf16(char_type const * s, size_t ls);
 
+/// Get the global IconvProcessor instance of the current thread for
+/// ucs4->utf8 conversions
+IconvProcessor & ucs4ToUtf8();
+
 // ucs4_to_utf8
 
 std::vector<char> ucs4_to_utf8(char_type c);

Reply via email to