Hi, it's simple - I hope :)
> So all I have to do is replace char* StringMgr::upperUTF8(char* t, > const unsigned int maxlen)? Reimplement upperUTF8 and then tell Sword to use an instance of your reimplementation to handle Unicode/Latin1 Strings with StringMgr::setSystemStringMgr. > If so, then what is maxlen for? Is it expecting t to be overwritten or > just a maximum buffer allocated? I assume that sword will dealloc any > buffer I return. If maxlen is > 0 upper only maxlen chars. It expects t to be overwritten. No buffers are allocated withing upperUTF8. I attached the BTStringMgr we use in BibleTime. I advise to check a string if it contains unicode chars before uppering the chars using Unicode. Checking is a lot faster than without. I hope that helps. And yes, we need better documentation :) If you return true in supportsUnicode then LocaleMgr will only load locales which are in UTF-8, so you can be sure that all verse keys are in UTF-8. Let me know if you need help, Joachim -- <>< Re: deemed!
// // C++ Implementation: btstringmgr // // Description: // // // Author: The BibleTime team <[EMAIL PROTECTED]>, (C) 2004 // // Copyright: See COPYING file that comes with this distribution // // #include "btstringmgr.h" //System includes #include <ctype.h> char* BTStringMgr::upperUTF8(char* text, const unsigned int maxlen) { const int max = (maxlen>0) ? maxlen : strlen(text); if (isUtf8(text)) { strncpy(text, (const char*)QString::fromUtf8(text).upper().utf8(), max); return text; } else { char* ret = text; while (*text) { *text = toupper(*text); text++; } return ret; } return text; } char* BTStringMgr::upperLatin1(char* text) { char* ret = text; while (*text) { *text++ = toupper(*text); } return ret; } const bool BTStringMgr::supportsUnicode() const { return true; } const bool BTStringMgr::isUtf8(const char *buf) { int i, n; register unsigned char c; bool gotone = false; #define F 0 /* character never appears in text */ #define T 1 /* character appears in plain ASCII text */ #define I 2 /* character appears in ISO-8859 text */ #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */ static const unsigned char text_chars[256] = { /* BEL BS HT LF FF CR */ F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */ /* ESC */ F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */ /* NEL */ X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */ }; /* *ulen = 0; */ for (i = 0; (c = buf[i]); i++) { if ((c & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ /* * Even if the whole file is valid UTF-8 sequences, * still reject it if it uses weird control characters. */ if (text_chars[c] != T) return false; } else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */ return false; } else { /* 11xxxxxx begins UTF-8 */ int following; if ((c & 0x20) == 0) { /* 110xxxxx */ following = 1; } else if ((c & 0x10) == 0) { /* 1110xxxx */ following = 2; } else if ((c & 0x08) == 0) { /* 11110xxx */ following = 3; } else if ((c & 0x04) == 0) { /* 111110xx */ following = 4; } else if ((c & 0x02) == 0) { /* 1111110x */ following = 5; } else return false; for (n = 0; n < following; n++) { i++; if (!(c = buf[i])) goto done; if ((c & 0x80) == 0 || (c & 0x40)) return false; } gotone = true; } } done: return gotone; /* don't claim it's UTF-8 if it's all 7-bit */ } #undef F #undef T #undef I #undef X
// // C++ Interface: btstringmgr // // Description: // // // Author: The BibleTime team <[EMAIL PROTECTED]>, (C) 2004 // // Copyright: See COPYING file that comes with this distribution // // #ifndef BTSTRINGMGR_H #define BTSTRINGMGR_H //Sword includes #include <stringmgr.h> //Qt includes #include <qstring.h> using namespace sword; class BTStringMgr : public StringMgr { public: /** Converts the param to an upper case Utf8 string * @param The text encoded in utf8 which should be turned into an upper case string */ virtual char* upperUTF8(char*, const unsigned int maxlen = 0); /** Converts the param to an uppercase latin1 string * @param The text encoded in latin1 which should be turned into an upper case string */ virtual char* upperLatin1(char*); protected: virtual const bool supportsUnicode() const; /** CODE TAKEN FROM KDELIBS 3.2 * This function checks whether a string is utf8 or not. * * It was taken from kdelibs so we do not depend on KDE 3.2. */ const bool isUtf8(const char *buf); }; #endif
pgp0cAaMq14LZ.pgp
Description: PGP signature
_______________________________________________ sword-devel mailing list [EMAIL PROTECTED] http://www.crosswire.org/mailman/listinfo/sword-devel