Abdelrazak Younes wrote: > Abdelrazak Younes wrote: >> If you try out the document attached in bug 3561 >> (http://bugzilla.lyx.org/show_bug.cgi?id=3561) and View->Source, the >> Encoding::init() will take 40 seconds on my system. >> >> With the attached patch, this goes down to 25 seconds.
Your patch (+ the part that is already in) has several problems: - char ucs4_to_eightbit(char_type ucs4, string const & encoding) does not make sense at all. You cannot guarantee that the result is only one char (besides it is unused). - The name of ucs4_to_multibytes is misleading: This function does exactly the same as ucs4_to_eightbit, only optimized for one UCS4 char - Now there are two maps with ucs4 -> 8bit iconv processors, one is enough and more efficient. - ucs4_to_multibytes silently fails for exotic conversions that result in more than 4 bytes. AFAIK LyX currently doesn't support such an encoding, but AFAIK some exist and they could be supported in the future. - there is no reason not to use the optimized map lookup in eightbit_to_ucs4, too. Consider the attached (untested) version if you like. Georg
Index: src/support/unicode.cpp =================================================================== --- src/support/unicode.cpp (Revision 18336) +++ src/support/unicode.cpp (Arbeitskopie) @@ -288,59 +288,54 @@ vector<char_type> eightbit_to_ucs4(char const * s, size_t ls, string const & encoding) { static map<string, IconvProcessor> processors; - if (processors.find(encoding) == processors.end()) { + map<string, IconvProcessor>::iterator it = processors.find(encoding); + if (it == processors.end()) { IconvProcessor processor(ucs4_codeset, encoding.c_str()); - processors.insert(make_pair(encoding, processor)); + it = processors.insert(make_pair(encoding, processor)).first; } - return iconv_convert<char_type>(processors[encoding], s, ls); + return iconv_convert<char_type>(it->second, s, ls); } -vector<char> -ucs4_to_eightbit(char_type const * ucs4str, size_t ls, string const & encoding) +namespace { + +/// processors for UCS4 -> 8bit encoding conversions +map<string, IconvProcessor> ucs4_processors; + + +/// Get processor for UCS4 -> \p encoding conversion +inline IconvProcessor & get_ucs4_processor(string const & encoding) { - static map<string, IconvProcessor> processors; - if (processors.find(encoding) == processors.end()) { + map<string, IconvProcessor>::iterator it = ucs4_processors.find(encoding); + if (it == ucs4_processors.end()) { IconvProcessor processor(encoding.c_str(), ucs4_codeset); - processors.insert(make_pair(encoding, processor)); + return ucs4_processors.insert(make_pair(encoding, processor)).first->second; } - return iconv_convert<char>(processors[encoding], ucs4str, ls); + return it->second; } +} -char ucs4_to_eightbit(char_type ucs4, string const & encoding) -{ - static map<string, IconvProcessor> processors; - map<string, IconvProcessor>::iterator it = processors.find(encoding); - if (it == processors.end()) { - IconvProcessor processor(encoding.c_str(), ucs4_codeset); - it = processors.insert(make_pair(encoding, processor)).first; - } - char out; - int const bytes = it->second.convert((char *)(&ucs4), 4, &out, 1); - if (bytes > 0) - return out; - return 0; +vector<char> +ucs4_to_eightbit(char_type const * ucs4str, size_t ls, string const & encoding) +{ + return iconv_convert<char>(get_ucs4_processor(encoding), ucs4str, ls); } -void ucs4_to_multibytes(char_type ucs4, vector<char> & out, +void ucs4_to_eightbit(char_type ucs4, vector<char> & out, string const & encoding) { - static map<string, IconvProcessor> processors; - map<string, IconvProcessor>::iterator it = processors.find(encoding); - if (it == processors.end()) { - IconvProcessor processor(encoding.c_str(), ucs4_codeset); - it = processors.insert(make_pair(encoding, processor)).first; - } out.resize(4); - int bytes = it->second.convert((char *)(&ucs4), 4, &out[0], 4); - if (bytes > 0) + int bytes = get_ucs4_processor(encoding).convert((char *)(&ucs4), 4, &out[0], 4); + if (bytes >= 0) out.resize(bytes); else - out.clear(); + // Use unoptimized version. + // Does only happen for exotic encodings + out = ucs4_to_eightbit(&ucs4, 1, encoding); } } // namespace lyx Index: src/support/unicode.h =================================================================== --- src/support/unicode.h (Revision 18336) +++ src/support/unicode.h (Arbeitskopie) @@ -89,12 +89,9 @@ eightbit_to_ucs4(char const * s, size_t std::vector<char> ucs4_to_eightbit(char_type const * ucs4str, size_t ls, std::string const & encoding); -/// convert ucs4 character \p c to encoding \p encoding. +/// convert ucs4 character \p ucs4 to encoding \p encoding. /// \p encoding must be a valid iconv 8bit encoding -char ucs4_to_eightbit(char_type c, std::string const & encoding); - -/// -void ucs4_to_multibytes(char_type ucs4, std::vector<char> & out, +void ucs4_to_eightbit(char_type ucs4, std::vector<char> & out, std::string const & encoding); extern char const * ucs4_codeset; Index: src/Encoding.cpp =================================================================== --- src/Encoding.cpp (Revision 18336) +++ src/Encoding.cpp (Arbeitskopie) @@ -171,8 +171,9 @@ void Encoding::init() const // they do not have a direct representation as a single byte, // therefore we need to check all UCS4 code points. // This is expensive! + std::vector<char> eightbit; for (char_type c = 0; c < max_ucs4; ++c) { - std::vector<char> const eightbit = ucs4_to_eightbit(&c, 1, iconvName_); + ucs4_to_eightbit(c, eightbit, iconvName_); if (!eightbit.empty()) { CharInfoMap::const_iterator const it = unicodesymbols.find(c); if (it == unicodesymbols.end() || !it->second.force)