After this patch I don't think it pays to fiddle a lot more with
unicode.C. Except moving away from iconv that is.

If you have the chance, please test this.

I will commit this quite soon.

Index: src/support/unicode.C
===================================================================
--- src/support/unicode.C	(revision 14966)
+++ src/support/unicode.C	(working copy)
@@ -20,10 +20,8 @@
 
 #include <cerrno>
 #include <iomanip>
-#include <string>
 
 using std::endl;
-using std::string;
 
 namespace {
 
@@ -35,36 +33,42 @@
 	char const * ucs2_codeset = "UCS-2LE";
 #endif
 
-std::vector<char>
-iconv_convert(std::string const & tocode, std::string const & fromcode,
-	      std::vector<char> const & buf)
-{
-	if (buf.empty())
-		return std::vector<char>();
-
-	iconv_t cd = iconv_open(tocode.c_str(), fromcode.c_str());
-	if (cd == (iconv_t)(-1)) {
-		lyxerr << "Error returned from iconv_open" << endl;
-		switch (errno) {
-		case EINVAL:
-			lyxerr << "EINVAL The conversion from " << fromcode
-			       << " to " << tocode
-			       << " is not supported by the implementation."
-			       << endl;
-			break;
-		default:
-			lyxerr << "\tSome other error: " << errno << endl;
-			break;
+template<typename RetType, typename InType>
+std::vector<RetType>
+iconv_convert(iconv_t * cd,
+	      char const * tocode,
+	      char const * fromcode,
+	      InType const * buf,
+	      size_t buflen)
+{
+	if (buflen == 0)
+		return std::vector<RetType>();
+
+	if (*cd == (iconv_t)(-1)) {
+		*cd = iconv_open(tocode, fromcode);
+		if (*cd == (iconv_t)(-1)) {
+			lyxerr << "Error returned from iconv_open" << endl;
+			switch (errno) {
+			case EINVAL:
+				lyxerr << "EINVAL The conversion from " << fromcode
+				       << " to " << tocode
+				       << " is not supported by the implementation."
+				       << endl;
+				break;
+			default:
+				lyxerr << "\tSome other error: " << errno << endl;
+				break;
+			}
 		}
 	}
 
-	char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(&buf[0]);
-	size_t inbytesleft = buf.size();
+	char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(reinterpret_cast<char const *>(buf));
+	size_t inbytesleft = buflen * sizeof(InType);
 	static char out[1000];
 	char * outbuf = out;
 	size_t outbytesleft = 1000;
 
-	size_t res = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+	size_t res = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
 
 	if (res == (size_t)(-1)) {
 		lyxerr << "Error returned from iconv" << endl;
@@ -78,7 +82,7 @@
 			       << "When converting from " << fromcode
 			       << " to " << tocode << ".\n";
 			lyxerr << "Input: " << std::hex;
-			for (size_t i = 0; i < buf.size(); ++i) {
+			for (size_t i = 0; i < buflen; ++i) {
 				unsigned char const b = buf[i];
 				lyxerr << "0x" << int(b) << " ";
 			}
@@ -90,7 +94,7 @@
 			       << "When converting from " << fromcode
 			       << " to " << tocode << ".\n";
 			lyxerr << "Input: " << std::hex;
-			for (size_t i = 0; i < buf.size(); ++i) {
+			for (size_t i = 0; i < buflen; ++i) {
 				unsigned char const b = buf[i];
 				lyxerr << "0x" << int(b) << " ";
 			}
@@ -100,11 +104,12 @@
 			lyxerr << "\tSome other error: " << errno << endl;
 			break;
 		}
-	}
-
-	if (iconv_close(cd) == -1) {
-		lyxerr << "Error returned from iconv_close("
-		       << errno << ")" << endl;
+		// We got an error so we close down the conversion engine
+		if (iconv_close(*cd) == -1) {
+			lyxerr << "Error returned from iconv_close("
+			       << errno << ")" << endl;
+		}
+		*cd = (iconv_t)(-1);
 	}
 
 	//lyxerr << std::dec;
@@ -112,22 +117,8 @@
 	//lyxerr << "Outbytesleft: " << outbytesleft << endl;
 	int bytes = 1000 - outbytesleft;
 
-	std::vector<char> outvec(out, out + bytes);
-	return outvec;
-}
-
-
-std::vector<boost::uint32_t> bytes_to_ucs4(std::vector<char> const & bytes)
-{
-	boost::uint32_t const * tmp = reinterpret_cast<uint32_t const *>(&bytes[0]);
-	return std::vector<boost::uint32_t>(tmp, tmp + bytes.size() / 4);
-}
-
-
-std::vector<unsigned short> bytes_to_ucs2(std::vector<char> const & bytes)
-{
-	unsigned short const * tmp = reinterpret_cast<unsigned short const *>(&bytes[0]);
-	return std::vector<unsigned short>(tmp, tmp + bytes.size() / 2);
+	RetType const * tmp = reinterpret_cast<RetType const *>(out);
+	return std::vector<RetType>(tmp, tmp + bytes / sizeof(RetType));
 }
 
 } // anon namespace
@@ -135,69 +126,55 @@
 
 std::vector<boost::uint32_t> utf8_to_ucs4(std::vector<char> const & utf8str)
 {
-	//lyxerr << "Buff = " << string(utf8str.begin(), utf8str.end())
-	//       << " (" << utf8str.size() << ")" << endl;
-	//lyxerr << "Res = " << string(res.begin(), res.end())
-	//       << " (" << res.size() << ")" << endl;
-
-	std::vector<char> res = iconv_convert(ucs4_codeset, "UTF-8", utf8str);
-	return bytes_to_ucs4(res);
+	static iconv_t cd = (iconv_t)(-1);
+	return iconv_convert<boost::uint32_t>(&cd, ucs4_codeset, "UTF-8",
+					      &utf8str[0], utf8str.size());
 }
 
 
 std::vector<boost::uint32_t>
 ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
 {
-	char const * tin = reinterpret_cast<char const *>(&ucs2str[0]);
-	std::vector<char> in(tin, tin + ucs2str.size() * 2);
-	std::vector<char> res = iconv_convert(ucs4_codeset, ucs2_codeset, in);
-	return bytes_to_ucs4(res);
+	static iconv_t cd = (iconv_t)(-1);
+	return iconv_convert<boost::uint32_t>(&cd, ucs4_codeset, ucs2_codeset,
+					      &ucs2str[0], ucs2str.size());
 }
 
 
 std::vector<unsigned short>
 ucs4_to_ucs2(std::vector<boost::uint32_t> const & ucs4str)
 {
-	char const * tin = reinterpret_cast<char const *>(&ucs4str[0]);
-	std::vector<char> in(tin, tin + ucs4str.size() * 4);
-	std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
-	return bytes_to_ucs2(res);
+	return ucs4_to_ucs2(&ucs4str[0], ucs4str.size());
 }
 
 
 std::vector<unsigned short>
 ucs4_to_ucs2(boost::uint32_t const * s, size_t ls)
 {
-	char const * tin = reinterpret_cast<char const *>(s);
-	std::vector<char> in(tin, tin + ls * 4);
-	std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
-	return bytes_to_ucs2(res);
+	static iconv_t cd = (iconv_t)(-1);
+	return iconv_convert<unsigned short>(&cd, ucs2_codeset, ucs4_codeset,
+					     s, ls);
 }
 
 
 unsigned short
 ucs4_to_ucs2(boost::uint32_t c)
 {
-	char const * tin = reinterpret_cast<char const *>(&c);
-	std::vector<char> in(tin, tin + 4);
-	std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
-	return bytes_to_ucs2(res)[0];
+	boost::uint32_t tmp[] = { c, 0 };
+	return ucs4_to_ucs2(tmp, 1)[0];
 }
 
 
 std::vector<char> ucs4_to_utf8(std::vector<boost::uint32_t> const & ucs4str)
 {
-	char const * tin = reinterpret_cast<char const *>(&ucs4str[0]);
-	std::vector<char> in(tin, tin + ucs4str.size() * 4);
-	std::vector<char> res = iconv_convert("UTF-8", ucs4_codeset, in);
-	return res;
+	static iconv_t cd = (iconv_t)(-1);
+	return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset,
+				   &ucs4str[0], ucs4str.size());
 }
 
 
 std::vector<char> ucs4_to_utf8(boost::uint32_t c)
 {
-	char const * tin = reinterpret_cast<char const *>(&c);
-	std::vector<char> in(tin, tin + 4);
-	std::vector<char> res = iconv_convert("UTF-8", ucs4_codeset, in);
-	return res;
+	static iconv_t cd = (iconv_t)(-1);
+	return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset, &c, 1);
 }
-- 
        Lgb

Reply via email to