Am Sonntag, 25. Februar 2007 16:06 schrieb Enrico Forestieri: > On Sun, Feb 25, 2007 at 10:31:16AM +0100, Georg Baum wrote: > > > Here is the patch, please test. > > With this patch, LyX asserts when loading some documents: > > assertion "ucs4 < 65536" failed: file "../../../src/support/../support/qstring_helpers.h", line 74
Because of META_INSET. I did not test a document with insets. This version follows Jean-Marc's idea more closely and assumes some defaults for non-utf16 characters. This morning I was in a hurry and forgot to do that. I will put this patch in tomorrow unless I get objections. > > It fixes bug 3270 for me (did not test > > 1247), but I am still not able to input an é via dead keys if LANG=C. > > I don't know whether this is a LyX problem. On Windows one can input > characters through the Alt+charcode method, Solaris has a Compose key > and characters can be inputed through Compose+<two-key-combination>. > This works whatever the value of LANG. I set up my keyboard with dead keys, and that works with gnome and kde applications and LANG=C. Nedit requires LANG=de_DE, and LyX works with LANG=de_DE and LANG=de_DE.UTF-8, so this looks like a LyX problem to me. Georg
Index: src/support/lstrings.C =================================================================== --- src/support/lstrings.C (Revision 17342) +++ src/support/lstrings.C (Arbeitskopie) @@ -14,6 +14,7 @@ #include "support/lstrings.h" #include "support/lyxlib.h" #include "support/convert.h" +#include "support/qstring_helpers.h" #include "debug.h" @@ -32,17 +33,6 @@ #include <algorithm> #include <sstream> -#ifdef LIBC_WCTYPE_USES_UCS4 -// We can use the libc ctype functions because we unset the LC_CTYPE -// category of the current locale in gettext.C -#include <wctype.h> -#else -// Steal some code from somewhere else, e.g. glib (look at gunicode.h) -// The code that we currently use does not really work. -#endif - - -using lyx::docstring; using std::transform; using std::string; @@ -321,38 +311,15 @@ char uppercase(char c) } -// FIXME UNICODE -// for lowercase() and uppercase() function below when wchar_t is not used: -// 1) std::tolower() and std::toupper() are templates that -// compile fine with char_type. With the test (c >= 256) we -// do not trust these function to do the right thing with -// unicode char. -// 2) these functions use the current locale, which is wrong -// if it is not latin1 based (latin1 is a subset of UCS4). - char_type lowercase(char_type c) { -#ifdef LIBC_WCTYPE_USES_UCS4 - return towlower(c); -#else - if (c >= 256) - return c; - - return tolower(c); -#endif + return qchar_to_ucs4(ucs4_to_qchar(c).toLower()); } char_type uppercase(char_type c) { -#ifdef LIBC_WCTYPE_USES_UCS4 - return towupper(c); -#else - if (c >= 256) - return c; - - return toupper(c); -#endif + return qchar_to_ucs4(ucs4_to_qchar(c).toUpper()); } @@ -361,10 +328,13 @@ namespace { // since we cannot use std::tolower and std::toupper directly in the // calls to std::transform yet, we use these helper clases. (Lgb) -template<typename Char> struct local_lowercase { - Char operator()(Char c) const { +struct local_lowercase { + char operator()(char c) const { return tolower(c); } + char_type operator()(char_type c) const { + return qchar_to_ucs4(ucs4_to_qchar(c).toLower()); + } }; struct local_uppercase { @@ -384,7 +354,7 @@ template<typename Char> struct local_asc string const lowercase(string const & a) { string tmp(a); - transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase<char>()); + transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase()); return tmp; } @@ -392,7 +362,7 @@ string const lowercase(string const & a) docstring const lowercase(docstring const & a) { docstring tmp(a); - transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase<char_type>()); + transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase()); return tmp; } Index: src/support/qstring_helpers.C =================================================================== --- src/support/qstring_helpers.C (Revision 17342) +++ src/support/qstring_helpers.C (Arbeitskopie) @@ -24,6 +24,7 @@ using std::string; // We use QString::fromUcs4 in Qt 4.2 and higher QString const toqstr(docstring const & str) { + // This does not properly convert surrogate pairs QString s; int i = static_cast<int>(str.size()); s.resize(i); @@ -44,7 +45,7 @@ docstring const qstring_to_ucs4(QString int const ls = qstr.size(); docstring ucs4; for (int i = 0; i < ls; ++i) - ucs4 += static_cast<char_type>(qstr[i].unicode()); + ucs4 += qchar_to_ucs4(qstr[i].unicode()); return ucs4; #endif } Index: src/support/textutils.C =================================================================== --- src/support/textutils.C (Revision 0) +++ src/support/textutils.C (Revision 0) @@ -0,0 +1,75 @@ +// -*- C++ -*- +/** + * \file textutils.C + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author Georg Baum + * + * Full author contact details are available in file CREDITS. + */ + +// FIXME: I can think of a better name for this file ... + +#include "support/textutils.h" +#include "support/qstring_helpers.h" + +namespace lyx { + +namespace { + /// Maximum valid UCS4 code point + char_type const ucs4_max = 0x10ffff; +} + + +bool isLetterChar(char_type c) +{ + if (!is_utf16(c)) { + if (c > ucs4_max) + // outside the UCS4 range + return false; + // assume that all non-utf16 characters are letters + return true; + } + return ucs4_to_qchar(c).isLetter(); +} + + +bool isPrintable(char_type c) +{ + if (!is_utf16(c)) { + if (c > ucs4_max) + // outside the UCS4 range + return false; + // assume that all non-utf16 characters are printable + return true; + } + return ucs4_to_qchar(c).isPrint(); +} + + +bool isPrintableNonspace(char_type c) +{ + if (!is_utf16(c)) { + if (c > ucs4_max) + // outside the UCS4 range + return false; + // assume that all non-utf16 characters are printable and + // no space + return true; + } + QChar const qc = ucs4_to_qchar(c); + return qc.isPrint() && !qc.isSpace(); +} + + +bool isDigit(char_type c) +{ + if (!is_utf16(c)) + // assume that no non-utf16 character is a digit + // c outside the UCS4 range is catched as well + return false; + return ucs4_to_qchar(c).isDigit(); +} + +} // namespace lyx Eigenschaftsänderungen: src/support/textutils.C ___________________________________________________________________ Name: svn:eol-style + native Index: src/support/lstrings.h =================================================================== --- src/support/lstrings.h (Revision 17342) +++ src/support/lstrings.h (Arbeitskopie) @@ -24,17 +24,22 @@ namespace lyx { namespace support { -/// +/// Compare \p s and \p s2, ignoring the case. +/// Caution: Depends on the locale int compare_no_case(std::string const & s, std::string const & s2); + +/// Compare \p s and \p s2, ignoring the case. +/// Does not depend on the locale. int compare_no_case(docstring const & s, docstring const & s2); -/// +/// Compare \p s and \p s2, ignoring the case of ASCII characters only. int compare_ascii_no_case(std::string const & s, std::string const & s2); -/// +/// Compare \p s and \p s2, ignoring the case of ASCII characters only. int compare_ascii_no_case(docstring const & s, docstring const & s2); -/// +/// Compare the first \p len characters of \p s and \p s2, ignoring the case. +/// Caution: Depends on the locale int compare_no_case(std::string const & s, std::string const & s2, unsigned int len); /// @@ -75,28 +80,37 @@ int hexToInt(lyx::docstring const & str) /// is \p str pure ascii? bool isAscii(docstring const & str); -/// +/// Changes the case of \p c to lowercase. +/// Caution: Depends on the locale char lowercase(char c); -/// +/// Changes the case of \p c to uppercase. +/// Caution: Depends on the locale char uppercase(char c); -/// changes the case only if c is a one-byte char +/// Changes the case of \p c to lowercase. +/// Does not depend on the locale. char_type lowercase(char_type c); -/// changes the case only if c is a one-byte char +/// Changes the case of \p c to uppercase. +/// Does not depend on the locale. char_type uppercase(char_type c); /// same as lowercase(), but ignores locale std::string const ascii_lowercase(std::string const &); docstring const ascii_lowercase(docstring const &); -/// -std::string const lowercase(std::string const &); -docstring const lowercase(docstring const &); - -/// -std::string const uppercase(std::string const &); +/// Changes the case of \p s to lowercase. +/// Caution: Depends on the locale +std::string const lowercase(std::string const & s); + +/// Changes the case of \p s to lowercase. +/// Does not depend on the locale. +docstring const lowercase(docstring const & s); + +/// Changes the case of \p s to uppercase. +/// Caution: Depends on the locale +std::string const uppercase(std::string const & s); /// Does the string start with this prefix? bool prefixIs(docstring const &, char_type); Index: src/support/qstring_helpers.h =================================================================== --- src/support/qstring_helpers.h (Revision 17342) +++ src/support/qstring_helpers.h (Arbeitskopie) @@ -45,6 +45,14 @@ inline QString const toqstr(std::string } +/// Is \p c a valid utf16 char? +inline bool is_utf16(char_type c) +{ + // 0xd800 ... 0xdfff is the range of surrogate pairs. + return c < 0xd800 || (c > 0xdfff && c < 0x10000); +} + + /** * Convert a QChar into a UCS4 character. * This is a hack (it does only make sense for the common part of the UCS4 @@ -54,6 +62,7 @@ inline QString const toqstr(std::string */ inline char_type const qchar_to_ucs4(QChar const & qchar) { + BOOST_ASSERT(is_utf16(static_cast<char_type>(qchar.unicode()))); return static_cast<char_type>(qchar.unicode()); } @@ -71,7 +80,7 @@ inline QChar const ucs4_to_qchar(char_ty // for the ucs2 subrange of unicode. Instead of an assertion we should // return some special characters that indicates that its display is // not supported. - BOOST_ASSERT(ucs4 < 65536); + BOOST_ASSERT(is_utf16(ucs4)); return QChar(static_cast<unsigned short>(ucs4)); } Index: src/support/textutils.h =================================================================== --- src/support/textutils.h (Revision 17342) +++ src/support/textutils.h (Arbeitskopie) @@ -17,15 +17,6 @@ #include "support/types.h" -#ifdef LIBC_WCTYPE_USES_UCS4 -// We can use the libc ctype functions because we unset the LC_CTYPE -// category of the current locale in gettext.C -#include <wctype.h> -#else -// Steal some code from somewhere else, e.g. glib (look at gunicode.h) -// The code that we currently use does not really work. -#endif - namespace lyx { @@ -36,61 +27,17 @@ bool isLineSeparatorChar(char_type c) return c == ' '; } - /// return true if a char is alphabetical (including accented chars) -inline -bool isLetterChar(char_type c) -{ -#ifdef LIBC_WCTYPE_USES_UCS4 - return iswalpha(c); -#else - // FIXME UNICODE This is wrong! - return (c >= 'A' && c <= 'Z') - || (c >= 'a' && c <= 'z') - || (c >= 192 && c < 256); // in iso-8859-x these are accented chars -#endif -} - +bool isLetterChar(char_type c); /// return true if the char is printable -inline -bool isPrintable(char_type c) -{ -#ifdef LIBC_WCTYPE_USES_UCS4 - return iswprint(c); -#else - // FIXME UNICODE This is wrong! - return (c & 127) >= ' '; -#endif -} - +bool isPrintable(char_type c); /// return true if the char is printable and not a space -inline -bool isPrintableNonspace(char_type c) -{ -#ifdef LIBC_WCTYPE_USES_UCS4 - return iswprint(c) && !iswspace(c); -#else - // FIXME UNICODE This is wrong! - return (c & 127) > ' '; -#endif -} - +bool isPrintableNonspace(char_type c); /// return true if a unicode char is a digit. -inline -bool isDigit(char_type c) -{ -#ifdef LIBC_WCTYPE_USES_UCS4 - return iswdigit(c); -#else - // FIXME UNICODE This is wrong! - return c >= '0' && c <= '9'; -#endif -} - - +bool isDigit(char_type c); } // namespace lyx Index: src/support/Makefile.am =================================================================== --- src/support/Makefile.am (Revision 17342) +++ src/support/Makefile.am (Arbeitskopie) @@ -78,6 +78,7 @@ libsupport_la_SOURCES = \ systemcall.C \ systemcall.h \ tempname.C \ + textutils.C \ textutils.h \ translator.h \ types.h \ Index: development/scons/scons_manifest.py =================================================================== --- development/scons/scons_manifest.py (Revision 17342) +++ development/scons/scons_manifest.py (Arbeitskopie) @@ -160,6 +160,7 @@ src_support_files = Split(''' socktools.C systemcall.C tempname.C + textutils.C unicode.C unlink.C userinfo.C