Am Sonntag, 4. Juni 2006 16:08 schrieb Angus Leeming: > Could you post a patch (or provide a link to the patch if it's huge)?
I am not Lars, but I just checked the branch out yesterday, so here is the patch. > Mmmmm. There are so few active devs, that I think you have to be very careful > about introducing breaking changes. People will get frustrated if breakages > elsewhere prevent them working on the favoured project. (I'm sure you're aware > of this, so I'll stop making noise on this subject hereafter). > > Of course, a real alternative to breaking head would be to essentially freeze > head and require development to continue in your branch. People could then port > stuff from the experimental branch back to a stable head. > > There seems to be a lot of good stuff going on at the moment; would be a shame > to slow that down. We had this discussion already, and the outcome was that as soon as the unicode stuff is at least barely usable it will go in trunk. That may slow down other things, but will speed up the unicode work. Georg
diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/frontends/font_metrics.h src/frontends/font_metrics.h --- lyx-1.5-clean/src/frontends/font_metrics.h 2006-04-09 09:07:21.000000000 +0200 +++ lyx-1.5-unicode/src/frontends/font_metrics.h 2006-06-03 20:46:21.000000000 +0200 @@ -70,9 +70,12 @@ } /// return the width of the string in the font int width(char const * s, size_t n, LyXFont const & f); + /// return the width of the string in the font + int width(lyx::char_type const * s, size_t n, LyXFont const & f); /// return the width of the char in the font inline int width(lyx::char_type c, LyXFont const & f) { - return width(&c, 1, f); + char tmp[2] = { c, '\0'}; + return width(tmp, 1, f); } /// return the width of the string in the font inline int width(std::string const & s, LyXFont const & f) { diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/frontends/LyXKeySym.h src/frontends/LyXKeySym.h --- lyx-1.5-clean/src/frontends/LyXKeySym.h 2006-04-14 09:25:03.000000000 +0200 +++ lyx-1.5-unicode/src/frontends/LyXKeySym.h 2006-06-03 20:46:21.000000000 +0200 @@ -53,6 +53,12 @@ virtual char getISOEncoded(std::string const & encoding) const = 0; /** + * Return the value of the keysym into the UCS-4 encoding. + * This converts the LyXKeySym to a 32-bit encoded character. + */ + virtual size_t getUCSEncoded() const = 0; + + /** * Return a string describing the KeySym with modifier mod. * This should use the native UI format when applicable */ diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/frontends/nullpainter.h src/frontends/nullpainter.h --- lyx-1.5-clean/src/frontends/nullpainter.h 2006-02-10 19:53:18.000000000 +0100 +++ lyx-1.5-unicode/src/frontends/nullpainter.h 2006-06-03 20:46:21.000000000 +0200 @@ -59,7 +59,9 @@ /// void text(int, int, char const *, size_t, LyXFont const &) {} /// - void text(int, int, char, LyXFont const &) {} + void text(int, int, lyx::char_type const *, size_t, LyXFont const &) {} + /// + void text(int, int, lyx::char_type, LyXFont const &) {} /// void rectText(int, int, std::string const &, LyXFont const &, LColor_color, LColor_color) {} diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/frontends/Painter.h src/frontends/Painter.h --- lyx-1.5-clean/src/frontends/Painter.h 2006-04-09 09:07:21.000000000 +0200 +++ lyx-1.5-unicode/src/frontends/Painter.h 2006-06-03 20:46:21.000000000 +0200 @@ -152,6 +152,14 @@ char const * str, size_t l, LyXFont const & f) = 0; + /** + * Draw a string at position x, y (y is the baseline) + * This is just for fast drawing + */ + virtual void text(int x, int y, + lyx::char_type const * str, size_t l, + LyXFont const & f) = 0; + /// draw a char at position x, y (y is the baseline) virtual void text(int x, int y, lyx::char_type c, LyXFont const & f) = 0; diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/frontends/qt3/qfont_metrics.C src/frontends/qt3/qfont_metrics.C --- lyx-1.5-clean/src/frontends/qt3/qfont_metrics.C 2006-04-24 20:27:15.000000000 +0200 +++ lyx-1.5-unicode/src/frontends/qt3/qfont_metrics.C 2006-06-03 20:46:20.000000000 +0200 @@ -18,6 +18,8 @@ #include "language.h" +#include "support/utf8.h" + using lyx::char_type; using std::string; @@ -153,6 +155,45 @@ } +int width(lyx::char_type const * str, size_t const ls, LyXFont const & f) +{ + if (!lyx_gui::use_gui) + return ls; + + QString s; + for (size_t i = 0; i < ls; ++i) { + char c[7] = {0}; + utf8_from_ucs4(str[i], c); + s.append(QString::fromUtf8(c)); + } + +#if 0 + if (f.realShape() == LyXFont::SMALLCAPS_SHAPE) + return smallcapswidth(s, ls, f); +#endif + + Encoding const * encoding = fontencoding(f); + QLFontInfo & fi = fontloader.fontinfo(f); + +#if 0 + if (ls == 1) + return fi.width(encoding->ucs(s[0])); + + int w = 0; + for (size_t i = 0; i < ls; ++i) + w += fi.width(encoding->ucs(s[i])); + + return w; +#else + int w = 0; + for (size_t i = 0; i < ls; ++i) + w += fi.width(s.unicode()[i]); + + return w; +#endif +} + + int signedWidth(string const & s, LyXFont const & f) { if (s[0] == '-') diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/frontends/qt3/QLPainter.C src/frontends/qt3/QLPainter.C --- lyx-1.5-clean/src/frontends/qt3/QLPainter.C 2006-04-24 20:27:15.000000000 +0200 +++ lyx-1.5-unicode/src/frontends/qt3/QLPainter.C 2006-06-03 20:46:20.000000000 +0200 @@ -21,6 +21,8 @@ #include "language.h" #include "LColor.h" +#include "support/utf8.h" + #include "frontends/font_metrics.h" #include <qpainter.h> @@ -177,14 +179,53 @@ void QLPainter::text(int x, int y, string const & s, LyXFont const & f) { + lyxerr << "Drawing string" << endl; return text(x, y, s.data(), s.length(), f); } void QLPainter::text(int x, int y, lyx::char_type c, LyXFont const & f) { +#if 0 char s[2] = { c, '\0' }; return text(x, y, s, 1, f); +#else + setPen(f.realColor()); + + Encoding const * encoding = f.language()->encoding(); + if (f.isSymbolFont()) + encoding = encodings.symbol_encoding(); + +#if 0 + QString str; + str.setLength(ls); + for (size_t i = 0; i < ls; ++i) + // Brain-dead MSVC wants at(i) rather than operator[] + str.at(i) = QChar(encoding->ucs(s[i])); + // HACK: QT3 refuses to show single compose characters + if (ls == 1 && str[0].unicode() >= 0x05b0 && str[0].unicode() <= 0x05c2) + str = ' ' + str; +#else + char s[7] = {0}; + utf8_from_ucs4(c, s); + QString str = QString::fromUtf8(s); + size_t ls = 1; +#endif + + if (f.realShape() != LyXFont::SMALLCAPS_SHAPE) { + qp_->setFont(fontloader.get(f)); + // We need to draw the text as LTR as we use our own bidi + // code. + lyxerr << "Drawing single character" << endl; + qp_->drawText(x, y, str, -1, QPainter::LTR); + } else { + smallCapsText(x, y, str, f); + } + + if (f.underbar() == LyXFont::ON) { + underline(f, x, y, font_metrics::width(s, ls, f)); + } +#endif } @@ -220,20 +261,69 @@ void QLPainter::text(int x, int y, char const * s, size_t ls, LyXFont const & f) { + lyxerr << "Drawing char const * s" << endl; + setPen(f.realColor()); + + Encoding const * encoding = f.language()->encoding(); + if (f.isSymbolFont()) + encoding = encodings.symbol_encoding(); + + QString str; + str.setLength(ls); + for (size_t i = 0; i < ls; ++i) + // Brain-dead MSVC wants at(i) rather than operator[] + str.at(i) = QChar(encoding->ucs(s[i])); + // HACK: QT3 refuses to show single compose characters + if (ls == 1 && str[0].unicode() >= 0x05b0 && str[0].unicode() <= 0x05c2) + str = ' ' + str; + + if (f.realShape() != LyXFont::SMALLCAPS_SHAPE) { + qp_->setFont(fontloader.get(f)); + // We need to draw the text as LTR as we use our own bidi + // code. + qp_->drawText(x, y, str, -1, QPainter::LTR); + } else { + smallCapsText(x, y, str, f); + } + + if (f.underbar() == LyXFont::ON) { + underline(f, x, y, font_metrics::width(s, ls, f)); + } +} + +void QLPainter::text(int x, int y, lyx::char_type const * s, size_t ls, + LyXFont const & f) +{ + lyxerr << "Drawing char const * s" << endl; setPen(f.realColor()); +#if 0 Encoding const * encoding = f.language()->encoding(); if (f.isSymbolFont()) encoding = encodings.symbol_encoding(); +#endif + +#if 0 QString str; str.setLength(ls); for (size_t i = 0; i < ls; ++i) // Brain-dead MSVC wants at(i) rather than operator[] str.at(i) = QChar(encoding->ucs(s[i])); +#else + QString str; + for (size_t i = 0; i < ls; ++i) { + char c[7] = {0}; + utf8_from_ucs4(s[i], c); + str.append(QString::fromUtf8(c)); + } +#endif + +#if 0 // HACK: QT3 refuses to show single compose characters if (ls == 1 && str[0].unicode() >= 0x05b0 && str[0].unicode() <= 0x05c2) str = ' ' + str; +#endif if (f.realShape() != LyXFont::SMALLCAPS_SHAPE) { qp_->setFont(fontloader.get(f)); diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/frontends/qt3/QLPainter.h src/frontends/qt3/QLPainter.h --- lyx-1.5-clean/src/frontends/qt3/QLPainter.h 2006-04-24 20:27:15.000000000 +0200 +++ lyx-1.5-unicode/src/frontends/qt3/QLPainter.h 2006-06-03 20:46:20.000000000 +0200 @@ -101,18 +101,25 @@ /// draw a string at position x, y (y is the baseline) virtual void text(int x, int y, - std::string const & str, LyXFont const & f); + std::string const & str, LyXFont const & f); /** Draw a string at position x, y (y is the baseline) * This is just for fast drawing */ virtual void text(int x, int y, - char const * str, size_t l, - LyXFont const & f); + char const * str, size_t l, + LyXFont const & f); + + /** Draw a string at position x, y (y is the baseline) + * This is just for fast drawing + */ + virtual void text(int x, int y, + lyx::char_type const * str, size_t l, + LyXFont const & f); /// draw a char at position x, y (y is the baseline) virtual void text(int x, int y, - char c, LyXFont const & f); + lyx::char_type c, LyXFont const & f); private: /// draw small caps text void smallCapsText(int x, int y, diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/frontends/qt3/QLyXKeySym.C src/frontends/qt3/QLyXKeySym.C --- lyx-1.5-clean/src/frontends/qt3/QLyXKeySym.C 2006-04-24 20:27:15.000000000 +0200 +++ lyx-1.5-unicode/src/frontends/qt3/QLyXKeySym.C 2006-06-03 20:46:20.000000000 +0200 @@ -24,6 +24,7 @@ #include <map> #include "support/lstrings.h" #include "support/environment.h" +#include "support/utf8.h" #include "encoding.h" #include "language.h" @@ -75,6 +76,7 @@ void initEncodings() { +#if 0 //const char * c = QTextCodec::locale(); //string s = c ? c : ""; // In this order, see support/filetools.C @@ -131,7 +133,7 @@ encoding_map[""] = defaultCodec; QTextCodec::setCodecForCStrings(defaultCodec); - +#endif } @@ -206,6 +208,17 @@ } +size_t QLyXKeySym::getUCSEncoded() const +{ + QCString tmp = text_.utf8(); + lyxerr << "Data is " << tmp << endl; + lyxerr << "Length is " << tmp.length() << endl; + size_t res = utf8_to_ucs4(tmp, tmp.length()); + lyxerr << "Res is " << res << endl; + return res; +} + + QString const QLyXKeySym::qprint(key_modifier::state mod) const { int tmpkey = key_; diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/frontends/qt3/QLyXKeySym.h src/frontends/qt3/QLyXKeySym.h --- lyx-1.5-clean/src/frontends/qt3/QLyXKeySym.h 2006-04-24 20:27:15.000000000 +0200 +++ lyx-1.5-unicode/src/frontends/qt3/QLyXKeySym.h 2006-06-03 20:46:20.000000000 +0200 @@ -55,6 +55,12 @@ */ virtual char getISOEncoded(std::string const & encoding) const; + /** + * Return the value of the keysym into the UCS-4 encoding. + * This converts the LyXKeySym to a 32-bit encoded character. + */ + virtual size_t getUCSEncoded() const; + /// Return a human-readable version of a key+modifier pair. virtual std::string const print(key_modifier::state mod) const; diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/lyxfunc.C src/lyxfunc.C --- lyx-1.5-clean/src/lyxfunc.C 2006-05-21 18:58:14.000000000 +0200 +++ lyx-1.5-unicode/src/lyxfunc.C 2006-06-03 20:46:22.000000000 +0200 @@ -92,6 +92,7 @@ #include "support/systemcall.h" #include "support/convert.h" #include "support/os.h" +#include "support/utf8.h" #include <boost/current_function.hpp> #include <boost/filesystem/operations.hpp> @@ -248,7 +249,8 @@ Encoding const * encoding = view()->cursor().getEncoding(); - encoded_last_key = keysym->getISOEncoded(encoding ? encoding->name() : ""); + //encoded_last_key = keysym->getISOEncoded(encoding ? encoding->name() : ""); + size_t encoded_last_key = keysym->getUCSEncoded(); // Do a one-deep top-level lookup for // cancel and meta-fake keys. RVDK_PATCH_5 @@ -321,7 +323,10 @@ if (func.action == LFUN_SELF_INSERT) { if (encoded_last_key != 0) { - string const arg(1, encoded_last_key); + char tmp[7] = {0}; + int l = utf8_from_ucs4(encoded_last_key, tmp); + //string const arg(1, encoded_last_key); + string const arg(tmp); dispatch(FuncRequest(LFUN_SELF_INSERT, arg, FuncRequest::KEYBOARD)); lyxerr[Debug::KEY] @@ -724,7 +729,7 @@ dispatch_buffer.erase(); // redraw the screen at the end (first of the two drawing steps). - //This is done unless explicitely requested otherwise + //This is done unless explicitely requested otherwise bool update = true; // also do the second redrawing step. Only done if requested. bool updateforce = false; diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/lyxfunc.h src/lyxfunc.h --- lyx-1.5-clean/src/lyxfunc.h 2006-04-14 09:25:04.000000000 +0200 +++ lyx-1.5-unicode/src/lyxfunc.h 2006-06-03 20:46:22.000000000 +0200 @@ -18,6 +18,8 @@ #include "kbsequence.h" #include "lfuns.h" +#include "support/types.h" + #include <boost/shared_ptr.hpp> #include <boost/signals/trackable.hpp> @@ -75,7 +77,7 @@ LyXView * owner; /// the last character added to the key sequence, in ISO encoded form - char encoded_last_key; + lyx::char_type encoded_last_key; /// kb_sequence keyseq; diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/lyxtext.h src/lyxtext.h --- lyx-1.5-clean/src/lyxtext.h 2006-04-09 09:07:22.000000000 +0200 +++ lyx-1.5-unicode/src/lyxtext.h 2006-06-03 20:46:22.000000000 +0200 @@ -50,6 +50,8 @@ /// typedef lyx::pos_type pos_type; /// + typedef lyx::char_type char_type; + /// typedef lyx::pit_type pit_type; /// constructor @@ -274,7 +276,7 @@ int singleWidth(Paragraph const & par, pos_type pos) const; /// int singleWidth(Paragraph const & par, - pos_type pos, char c, LyXFont const & Font) const; + pos_type pos, char_type c, LyXFont const & Font) const; /// return the color of the canvas LColor_color backgroundColor() const; diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/paragraph.C src/paragraph.C --- lyx-1.5-clean/src/paragraph.C 2006-05-10 21:51:56.000000000 +0200 +++ lyx-1.5-unicode/src/paragraph.C 2006-06-03 20:46:22.000000000 +0200 @@ -44,6 +44,7 @@ #include "support/lstrings.h" #include "support/textutils.h" #include "support/convert.h" +#include "support/utf8.h" #include <boost/tuple/tuple.hpp> #include <boost/bind.hpp> @@ -54,6 +55,7 @@ #include <sstream> using lyx::pos_type; +using lyx::char_type; using lyx::support::subst; @@ -212,9 +214,12 @@ } // this check is to amend a bug. LyX sometimes // inserts '\0' this could cause problems. - if (c != '\0') - os << c; - else + if (c != '\0') { + char tmp[7] = {0}; + lyxerr << "C is " << c << endl; + utf8_from_ucs4(c, tmp); + os << tmp; + } else lyxerr << "ERROR (Paragraph::writeFile):" " NULL char in structure." << endl; ++column; @@ -1801,7 +1806,7 @@ } -unsigned char Paragraph::transformChar(unsigned char c, pos_type pos) const +char_type Paragraph::transformChar(char_type c, pos_type pos) const { if (!Encodings::is_arabic(c)) if (lyxrc.font_norm_type == LyXRC::ISO_8859_6_8 && isDigit(c)) diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/paragraph.h src/paragraph.h --- lyx-1.5-clean/src/paragraph.h 2006-05-10 21:51:56.000000000 +0200 +++ lyx-1.5-unicode/src/paragraph.h 2006-06-03 20:46:22.000000000 +0200 @@ -69,8 +69,8 @@ /// (returning 0) - if this was 0, then we'd /// try getInset() and crash. We should fix /// all these places. - META_INSET = 1 - //META_INSET = 0x200001 // above 0x10ffff, for ucs-4 + //META_INSET = 1 // as in trunk + META_INSET = 0x200001 // above 0x10ffff, for ucs-4 }; enum ChangeTracking { @@ -376,7 +376,7 @@ /// return true if we allow this par to stay empty bool allowEmpty() const; /// - unsigned char transformChar(unsigned char c, lyx::pos_type pos) const; + lyx::char_type transformChar(lyx::char_type c, lyx::pos_type pos) const; /// ParagraphParameters & params(); /// diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/rowpainter.C src/rowpainter.C --- lyx-1.5-clean/src/rowpainter.C 2006-05-10 21:51:56.000000000 +0200 +++ lyx-1.5-unicode/src/rowpainter.C 2006-06-03 20:46:22.000000000 +0200 @@ -253,10 +253,15 @@ Change::Type const prev_change = par_.lookupChange(pos).type; // first character +#if 0 string str; str += par_.getChar(pos); +#else + std::vector<char_type> str; + str.push_back(par_.getChar(pos)); +#endif if (arabic) { - unsigned char c = str[0]; + char_type c = str[0]; str[0] = par_.transformChar(c, pos); } @@ -283,7 +288,11 @@ if (arabic) c = par_.transformChar(c, pos); +#if 0 str += c; +#else + str.push_back(c); +#endif } if (prev_change == Change::DELETED) @@ -293,8 +302,13 @@ // Draw text and set the new x position //lyxerr << "paint row: yo_ " << yo_ << "\n"; +#if 0 pain_.text(int(x_), yo_, str, font); x_ += font_metrics::width(str, font); +#else + pain_.text(int(x_), yo_, &str[0], str.size(), font); + x_ += font_metrics::width(&str[0], str.size(), font); +#endif } diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/support/Makefile.am src/support/Makefile.am --- lyx-1.5-clean/src/support/Makefile.am 2006-05-18 20:15:10.000000000 +0200 +++ lyx-1.5-unicode/src/support/Makefile.am 2006-06-03 20:46:21.000000000 +0200 @@ -74,7 +74,9 @@ types.h \ userinfo.C \ userinfo.h \ - unlink.C + unlink.C \ + utf8.C \ + utf8.h package.C: build_package diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/support/types.h src/support/types.h --- lyx-1.5-clean/src/support/types.h 2006-04-09 09:07:21.000000000 +0200 +++ lyx-1.5-unicode/src/support/types.h 2006-06-03 20:46:21.000000000 +0200 @@ -16,15 +16,16 @@ #ifndef LYX_TYPES_H #define LYX_TYPES_H +#include <boost/cstdint.hpp> + #include <cstddef> namespace lyx { // The type used to hold characters in paragraphs - //typedef uint32_t char_type; // Possibly the ucs-4 type we will use + typedef boost::uint32_t char_type; // Possibly the ucs-4 type we will use //typedef wchar_t char_type; // The wide char type CJK-LyX uses - typedef char char_type; // Current narrow char type in use - + //typedef char char_type; // Current narrow char type in use /// a type for positions used in paragraphs // needs to be signed for a while to hold the special value -1 that is diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/support/utf8.C src/support/utf8.C --- lyx-1.5-clean/src/support/utf8.C 1970-01-01 01:00:00.000000000 +0100 +++ lyx-1.5-unicode/src/support/utf8.C 2006-06-03 20:46:21.000000000 +0200 @@ -0,0 +1,123 @@ +/* + * This file is part of NetSurf, http://netsurf.sourceforge.net/ + * Licensed under the GNU General Public License, + * http://www.opensource.org/licenses/gpl-license + * Copyright 2005 John M Bell <[EMAIL PROTECTED]> + */ + +/** \file + * UTF-8 manipulation functions (implementation). + */ + +/* http://netsurf.strcprstskrzkrk.co.uk/codedocs/utf8_8c-source.html */ + +#include "utf8.h" + +#include <cassert> + + /** + * Convert a UTF-8 multibyte sequence into a single UCS4 character + * + * Encoding of UCS values outside the UTF-16 plane has been removed from + * RFC3629. This function conforms to RFC2279, however. + * + * \param s The sequence to process + * \param l Length of sequence + * \return UCS4 character + */ +size_t utf8_to_ucs4(const char *str, size_t l) +{ + size_t c = 0; + unsigned char const * s = reinterpret_cast<unsigned char const *>(str); + + if (!s) + assert(0); + else if (l > 0 && *s < 0x80) + c = *s; + else if (l > 1 && (*s & 0xE0) == 0xC0 && (*(s+1) & 0xC0) == 0x80) + c = ((*s & 0x1F) << 6) | (*(s+1) & 0x3F); + else if (l > 2 && (*s & 0xF0) == 0xE0 && (*(s+1) & 0xC0) == 0x80 && + (*(s+2) & 0xC0) == 0x80) + c = ((*s & 0x0F) << 12) | ((*(s+1) & 0x3F) << 6) | + (*(s+2) & 0x3F); + else if (l > 3 && (*s & 0xF8) == 0xF0 && (*(s+1) & 0xC0) == 0x80 && + (*(s+2) & 0xC0) == 0x80 && (*(s+3) & 0xC0) == 0x80) + c = ((*s & 0x0F) << 18) | ((*(s+1) & 0x3F) << 12) | + ((*(s+2) & 0x3F) << 6) | (*(s+3) & 0x3F); + else if (l > 4 && (*s & 0xFC) == 0xF8 && (*(s+1) & 0xC0) == 0x80 && + (*(s+2) & 0xC0) == 0x80 && (*(s+3) & 0xC0) == 0x80 && + (*(s+4) & 0xC0) == 0x80) + c = ((*s & 0x0F) << 24) | ((*(s+1) & 0x3F) << 18) | + ((*(s+2) & 0x3F) << 12) | ((*(s+3) & 0x3F) << 6) | + (*(s+4) & 0x3F); + else if (l > 5 && (*s & 0xFE) == 0xFC && (*(s+1) & 0xC0) == 0x80 && + (*(s+2) & 0xC0) == 0x80 && (*(s+3) & 0xC0) == 0x80 && + (*(s+4) & 0xC0) == 0x80 && (*(s+5) & 0xC0) == 0x80) + c = ((*s & 0x0F) << 28) | ((*(s+1) & 0x3F) << 24) | + ((*(s+2) & 0x3F) << 18) | ((*(s+3) & 0x3F) << 12) | + ((*(s+4) & 0x3F) << 6) | (*(s+5) & 0x3F); + else + assert(0); + + return c; +} + + +/** + * Convert a single UCS4 character into a UTF-8 multibyte sequence + * + * Encoding of UCS values outside the UTF-16 plane has been removed from + * RFC3629. This function conforms to RFC2279, however. + * + * \param c The character to process (0 <= c <= 0x7FFFFFFF) + * \param s Pointer to 6 byte long output buffer + * \return Length of multibyte sequence + */ +size_t utf8_from_ucs4(size_t c, char *s) +{ + size_t l = 0; + + if (c > 0x7FFFFFFF || s == NULL) + assert(0); + else if (c < 0x80) { + *s = (char)c; + l = 1; + } + else if (c < 0x800) { + *s = 0xC0 | ((c >> 6) & 0x1F); + *(s+1) = 0x80 | (c & 0x3F); + l = 2; + } + else if (c < 0x10000) { + *s = 0xE0 | ((c >> 12) & 0xF); + *(s+1) = 0x80 | ((c >> 6) & 0x3F); + *(s+2) = 0x80 | (c & 0x3F); + l = 3; + } + else if (c < 0x200000) { + *s = 0xF0 | ((c >> 18) & 0x7); + *(s+1) = 0x80 | ((c >> 12) & 0x3F); + *(s+2) = 0x80 | ((c >> 6) & 0x3F); + *(s+3) = 0x80 | (c & 0x3F); + l = 4; + } + else if (c < 0x4000000) { + *s = 0xF8 | ((c >> 24) & 0x3); + *(s+1) = 0x80 | ((c >> 18) & 0x3F); + *(s+2) = 0x80 | ((c >> 12) & 0x3F); + *(s+3) = 0x80 | ((c >> 6) & 0x3F); + *(s+4) = 0x80 | (c & 0x3F); + l = 5; + } + else if (c <= 0x7FFFFFFF) { + *s = 0xFC | ((c >> 30) & 0x1); + *(s+1) = 0x80 | ((c >> 24) & 0x3F); + *(s+2) = 0x80 | ((c >> 18) & 0x3F); + *(s+3) = 0x80 | ((c >> 12) & 0x3F); + *(s+4) = 0x80 | ((c >> 6) & 0x3F); + *(s+5) = 0x80 | (c & 0x3F); + l = 6; + } + + return l; +} diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/support/utf8.h src/support/utf8.h --- lyx-1.5-clean/src/support/utf8.h 1970-01-01 01:00:00.000000000 +0100 +++ lyx-1.5-unicode/src/support/utf8.h 2006-06-03 20:46:21.000000000 +0200 @@ -0,0 +1,22 @@ +// -*- C++ -*- +/** + * \file lstrings.h + * This file is part of LyX, the document processor. + * + * Licensed under the GNU General Public License, + * http://www.opensource.org/licenses/gpl-license + * Copyright 2005 John M Bell <[EMAIL PROTECTED]> + * + * UTF-8 manipulation functions (implementation). + * http://netsurf.strcprstskrzkrk.co.uk/codedocs/utf8_8c-source.html + */ + +#ifndef LYX_SUPPORT_UTF8_H +#define LYX_SUPPORT_UTF8_H + +#include <cstddef> + +size_t utf8_to_ucs4(const char *s, size_t l); +size_t utf8_from_ucs4(size_t c, char *s); + +#endif diff -ruNx .svn -x Makefile.in ../lyx-1.5-clean/src/text3.C src/text3.C --- lyx-1.5-clean/src/text3.C 2006-05-10 21:51:56.000000000 +0200 +++ lyx-1.5-unicode/src/text3.C 2006-06-03 20:46:22.000000000 +0200 @@ -60,6 +60,7 @@ #include "support/lyxlib.h" #include "support/convert.h" #include "support/lyxtime.h" +#include "support/utf8.h" #include "mathed/math_hullinset.h" #include "mathed/math_macrotemplate.h" @@ -69,6 +70,7 @@ #include <clocale> #include <sstream> +using lyx::char_type; using lyx::pos_type; using lyx::cap::copySelection; @@ -1159,11 +1161,17 @@ cur.clearSelection(); LyXFont const old_font = real_current_font; +#if 0 string::const_iterator cit = cmd.argument.begin(); string::const_iterator end = cmd.argument.end(); for (; cit != end; ++cit) bv->owner()->getIntl().getTransManager(). translateAndInsert(*cit, this); +#else + lyx::char_type c + = utf8_to_ucs4(cmd.argument.data(), cmd.argument.size()); + insertChar(bv->cursor(), c); +#endif cur.resetAnchor(); moveCursor(cur, false);