Am Sonntag, 25. Februar 2007 16:06 schrieb Enrico Forestieri:
> On Sun, Feb 25, 2007 at 10:31:16AM +0100, Georg Baum wrote:
> 
> > Here is the patch, please test.
> 
> With this patch, LyX asserts when loading some documents:
> 
> assertion "ucs4 < 65536" failed: 
file "../../../src/support/../support/qstring_helpers.h", line 74

Because of META_INSET. I did not test a document with insets. This version 
follows Jean-Marc's idea more closely and assumes some defaults for 
non-utf16 characters. This morning I was in a hurry and forgot to do that.

I will put this patch in tomorrow unless I get objections.

> > It fixes bug 3270 for me (did not test 
> > 1247), but I am still not able to input an é via dead keys if LANG=C.
> 
> I don't know whether this is a LyX problem. On Windows one can input
> characters through the Alt+charcode method, Solaris has a Compose key
> and characters can be inputed through Compose+<two-key-combination>.
> This works whatever the value of LANG.

I set up my keyboard with dead keys, and that works with gnome and kde 
applications and LANG=C. Nedit requires LANG=de_DE, and LyX works with 
LANG=de_DE and LANG=de_DE.UTF-8, so this looks like a LyX problem to me.


Georg
Index: src/support/lstrings.C
===================================================================
--- src/support/lstrings.C	(Revision 17342)
+++ src/support/lstrings.C	(Arbeitskopie)
@@ -14,6 +14,7 @@
 #include "support/lstrings.h"
 #include "support/lyxlib.h"
 #include "support/convert.h"
+#include "support/qstring_helpers.h"
 
 #include "debug.h"
 
@@ -32,17 +33,6 @@
 #include <algorithm>
 #include <sstream>
 
-#ifdef LIBC_WCTYPE_USES_UCS4
-// We can use the libc ctype functions because we unset the LC_CTYPE
-// category of the current locale in gettext.C
-#include <wctype.h>
-#else
-// Steal some code from somewhere else, e.g. glib (look at gunicode.h)
-// The code that we currently use does not really work.
-#endif
-
-
-using lyx::docstring;
 
 using std::transform;
 using std::string;
@@ -321,38 +311,15 @@ char uppercase(char c)
 }
 
 
-// FIXME UNICODE
-// for lowercase() and uppercase() function below when wchar_t is not used:
-// 1) std::tolower() and std::toupper() are templates that
-// compile fine with char_type. With the test (c >= 256) we
-// do not trust these function to do the right thing with
-// unicode char.
-// 2) these functions use the current locale, which is wrong
-// if it is not latin1 based (latin1 is a subset of UCS4).
-
 char_type lowercase(char_type c)
 {
-#ifdef LIBC_WCTYPE_USES_UCS4
-	return towlower(c);
-#else
-	if (c >= 256)
-		return c;
-
-	return tolower(c);
-#endif
+	return qchar_to_ucs4(ucs4_to_qchar(c).toLower());
 }
 
 
 char_type uppercase(char_type c)
 {
-#ifdef LIBC_WCTYPE_USES_UCS4
-	return towupper(c);
-#else
-	if (c >= 256)
-		return c;
-
-	return toupper(c);
-#endif
+	return qchar_to_ucs4(ucs4_to_qchar(c).toUpper());
 }
 
 
@@ -361,10 +328,13 @@ namespace {
 // since we cannot use std::tolower and std::toupper directly in the
 // calls to std::transform yet, we use these helper clases. (Lgb)
 
-template<typename Char> struct local_lowercase {
-	Char operator()(Char c) const {
+struct local_lowercase {
+	char operator()(char c) const {
 		return tolower(c);
 	}
+	char_type operator()(char_type c) const {
+		return qchar_to_ucs4(ucs4_to_qchar(c).toLower());
+	}
 };
 
 struct local_uppercase {
@@ -384,7 +354,7 @@ template<typename Char> struct local_asc
 string const lowercase(string const & a)
 {
 	string tmp(a);
-	transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase<char>());
+	transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 	return tmp;
 }
 
@@ -392,7 +362,7 @@ string const lowercase(string const & a)
 docstring const lowercase(docstring const & a)
 {
 	docstring tmp(a);
-	transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase<char_type>());
+	transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 	return tmp;
 }
 
Index: src/support/qstring_helpers.C
===================================================================
--- src/support/qstring_helpers.C	(Revision 17342)
+++ src/support/qstring_helpers.C	(Arbeitskopie)
@@ -24,6 +24,7 @@ using std::string;
 // We use QString::fromUcs4 in Qt 4.2 and higher
 QString const toqstr(docstring const & str)
 {
+	// This does not properly convert surrogate pairs
 	QString s;
 	int i = static_cast<int>(str.size()); 
 	s.resize(i);
@@ -44,7 +45,7 @@ docstring const qstring_to_ucs4(QString 
 	int const ls = qstr.size();
 	docstring ucs4;
 	for (int i = 0; i < ls; ++i)
-		ucs4 += static_cast<char_type>(qstr[i].unicode());
+		ucs4 += qchar_to_ucs4(qstr[i].unicode());
 	return ucs4;
 #endif
 }
Index: src/support/textutils.C
===================================================================
--- src/support/textutils.C	(Revision 0)
+++ src/support/textutils.C	(Revision 0)
@@ -0,0 +1,75 @@
+// -*- C++ -*-
+/**
+ * \file textutils.C
+ * This file is part of LyX, the document processor.
+ * Licence details can be found in the file COPYING.
+ *
+ * \author Georg Baum
+ *
+ * Full author contact details are available in file CREDITS.
+ */
+
+// FIXME: I can think of a better name for this file ...
+
+#include "support/textutils.h"
+#include "support/qstring_helpers.h"
+
+namespace lyx {
+
+namespace {
+	/// Maximum valid UCS4 code point
+	char_type const ucs4_max = 0x10ffff;
+}
+
+
+bool isLetterChar(char_type c)
+{
+	if (!is_utf16(c)) {
+		if (c > ucs4_max)
+			// outside the UCS4 range
+			return false;
+		// assume that all non-utf16 characters are letters
+		return true;
+	}
+	return ucs4_to_qchar(c).isLetter();
+}
+
+
+bool isPrintable(char_type c)
+{
+	if (!is_utf16(c)) {
+		if (c > ucs4_max)
+			// outside the UCS4 range
+			return false;
+		// assume that all non-utf16 characters are printable
+		return true;
+	}
+	return ucs4_to_qchar(c).isPrint();
+}
+
+
+bool isPrintableNonspace(char_type c)
+{
+	if (!is_utf16(c)) {
+		if (c > ucs4_max)
+			// outside the UCS4 range
+			return false;
+		// assume that all non-utf16 characters are printable and
+		// no space
+		return true;
+	}
+	QChar const qc = ucs4_to_qchar(c);
+	return qc.isPrint() && !qc.isSpace();
+}
+
+
+bool isDigit(char_type c)
+{
+	if (!is_utf16(c))
+		// assume that no non-utf16 character is a digit
+		// c outside the UCS4 range is catched as well
+		return false;
+	return ucs4_to_qchar(c).isDigit();
+}
+
+} // namespace lyx

Eigenschaftsänderungen: src/support/textutils.C
___________________________________________________________________
Name: svn:eol-style
   + native

Index: src/support/lstrings.h
===================================================================
--- src/support/lstrings.h	(Revision 17342)
+++ src/support/lstrings.h	(Arbeitskopie)
@@ -24,17 +24,22 @@
 namespace lyx {
 namespace support {
 
-///
+/// Compare \p s and \p s2, ignoring the case.
+/// Caution: Depends on the locale
 int compare_no_case(std::string const & s, std::string const & s2);
+
+/// Compare \p s and \p s2, ignoring the case.
+/// Does not depend on the locale.
 int compare_no_case(docstring const & s, docstring const & s2);
 
-///
+/// Compare \p s and \p s2, ignoring the case of ASCII characters only.
 int compare_ascii_no_case(std::string const & s, std::string const & s2);
 
-///
+/// Compare \p s and \p s2, ignoring the case of ASCII characters only.
 int compare_ascii_no_case(docstring const & s, docstring const & s2);
 
-///
+/// Compare the first \p len characters of \p s and \p s2, ignoring the case.
+/// Caution: Depends on the locale
 int compare_no_case(std::string const & s, std::string const & s2, unsigned int len);
 
 ///
@@ -75,28 +80,37 @@ int hexToInt(lyx::docstring const & str)
 /// is \p str pure ascii?
 bool isAscii(docstring const & str);
 
-///
+/// Changes the case of \p c to lowercase.
+/// Caution: Depends on the locale
 char lowercase(char c);
 
-///
+/// Changes the case of \p c to uppercase.
+/// Caution: Depends on the locale
 char uppercase(char c);
 
-/// changes the case only if c is a one-byte char
+/// Changes the case of \p c to lowercase.
+/// Does not depend on the locale.
 char_type lowercase(char_type c);
 
-/// changes the case only if c is a one-byte char
+/// Changes the case of \p c to uppercase.
+/// Does not depend on the locale.
 char_type uppercase(char_type c);
 
 /// same as lowercase(), but ignores locale
 std::string const ascii_lowercase(std::string const &);
 docstring const ascii_lowercase(docstring const &);
 
-///
-std::string const lowercase(std::string const &);
-docstring const lowercase(docstring const &);
-
-///
-std::string const uppercase(std::string const &);
+/// Changes the case of \p s to lowercase.
+/// Caution: Depends on the locale
+std::string const lowercase(std::string const & s);
+
+/// Changes the case of \p s to lowercase.
+/// Does not depend on the locale.
+docstring const lowercase(docstring const & s);
+
+/// Changes the case of \p s to uppercase.
+/// Caution: Depends on the locale
+std::string const uppercase(std::string const & s);
 
 /// Does the string start with this prefix?
 bool prefixIs(docstring const &, char_type);
Index: src/support/qstring_helpers.h
===================================================================
--- src/support/qstring_helpers.h	(Revision 17342)
+++ src/support/qstring_helpers.h	(Arbeitskopie)
@@ -45,6 +45,14 @@ inline QString const toqstr(std::string 
 }
 
 
+/// Is \p c a valid utf16 char?
+inline bool is_utf16(char_type c)
+{
+	// 0xd800 ... 0xdfff is the range of surrogate pairs.
+	return c < 0xd800 || (c > 0xdfff && c < 0x10000);
+}
+
+
 /**
  * Convert a QChar into a UCS4 character.
  * This is a hack (it does only make sense for the common part of the UCS4
@@ -54,6 +62,7 @@ inline QString const toqstr(std::string 
  */
 inline char_type const qchar_to_ucs4(QChar const & qchar)
 {
+	BOOST_ASSERT(is_utf16(static_cast<char_type>(qchar.unicode())));
 	return static_cast<char_type>(qchar.unicode());
 }
 
@@ -71,7 +80,7 @@ inline QChar const ucs4_to_qchar(char_ty
 	// for the ucs2 subrange of unicode. Instead of an assertion we should
 	// return some special characters that indicates that its display is
 	// not supported.
-	BOOST_ASSERT(ucs4 < 65536);
+	BOOST_ASSERT(is_utf16(ucs4));
 	return QChar(static_cast<unsigned short>(ucs4));
 }
 
Index: src/support/textutils.h
===================================================================
--- src/support/textutils.h	(Revision 17342)
+++ src/support/textutils.h	(Arbeitskopie)
@@ -17,15 +17,6 @@
 
 #include "support/types.h"
 
-#ifdef LIBC_WCTYPE_USES_UCS4
-// We can use the libc ctype functions because we unset the LC_CTYPE
-// category of the current locale in gettext.C
-#include <wctype.h>
-#else
-// Steal some code from somewhere else, e.g. glib (look at gunicode.h)
-// The code that we currently use does not really work.
-#endif
-
 
 namespace lyx {
 
@@ -36,61 +27,17 @@ bool isLineSeparatorChar(char_type c)
 	return c == ' ';
 }
 
-
 /// return true if a char is alphabetical (including accented chars)
-inline
-bool isLetterChar(char_type c)
-{
-#ifdef LIBC_WCTYPE_USES_UCS4
-	return iswalpha(c);
-#else
-	// FIXME UNICODE This is wrong!
-	return (c >= 'A' && c <= 'Z')
-		|| (c >= 'a' && c <= 'z')
-		|| (c >= 192 && c < 256); // in iso-8859-x these are accented chars
-#endif
-}
-
+bool isLetterChar(char_type c);
 
 /// return true if the char is printable
-inline
-bool isPrintable(char_type c)
-{
-#ifdef LIBC_WCTYPE_USES_UCS4
-	return iswprint(c);
-#else
-	// FIXME UNICODE This is wrong!
-	return (c & 127) >= ' ';
-#endif
-}
-
+bool isPrintable(char_type c);
 
 /// return true if the char is printable and not a space
-inline
-bool isPrintableNonspace(char_type c)
-{
-#ifdef LIBC_WCTYPE_USES_UCS4
-	return iswprint(c) && !iswspace(c);
-#else
-	// FIXME UNICODE This is wrong!
-	return (c & 127) > ' ';
-#endif
-}
-
+bool isPrintableNonspace(char_type c);
 
 /// return true if a unicode char is a digit.
-inline
-bool isDigit(char_type c)
-{
-#ifdef LIBC_WCTYPE_USES_UCS4
-	return iswdigit(c);
-#else
-	// FIXME UNICODE This is wrong!
-	return c >= '0' && c <= '9';
-#endif
-}
-
-
+bool isDigit(char_type c);
 
 } // namespace lyx
 
Index: src/support/Makefile.am
===================================================================
--- src/support/Makefile.am	(Revision 17342)
+++ src/support/Makefile.am	(Arbeitskopie)
@@ -78,6 +78,7 @@ libsupport_la_SOURCES = \
 	systemcall.C \
 	systemcall.h \
 	tempname.C \
+	textutils.C \
 	textutils.h \
 	translator.h \
 	types.h \
Index: development/scons/scons_manifest.py
===================================================================
--- development/scons/scons_manifest.py	(Revision 17342)
+++ development/scons/scons_manifest.py	(Arbeitskopie)
@@ -160,6 +160,7 @@ src_support_files = Split('''
     socktools.C
     systemcall.C
     tempname.C
+    textutils.C
     unicode.C
     unlink.C
     userinfo.C

Reply via email to