What's the status on ICU and StringMgr? I thought that if ICU was compiled into the library, it provided a subclass: ICUStringMgr that worked with UTF8. This would mean that Will doesn't have to do anything if he already includes ICU support.
-Troy.
Joachim Ansorg wrote:
Hi,
it's simple - I hope :)
So all I have to do is replace char* StringMgr::upperUTF8(char* t, const unsigned int maxlen)?
Reimplement upperUTF8 and then tell Sword to use an instance of your reimplementation to handle Unicode/Latin1 Strings with StringMgr::setSystemStringMgr.
If so, then what is maxlen for? Is it expecting t to be overwritten or just a maximum buffer allocated? I assume that sword will dealloc any buffer I return.
If maxlen is > 0 upper only maxlen chars. It expects t to be overwritten. No buffers are allocated withing upperUTF8. I attached the BTStringMgr we use in BibleTime.
I advise to check a string if it contains unicode chars before uppering the chars using Unicode. Checking is a lot faster than without.
I hope that helps. And yes, we need better documentation :)
If you return true in supportsUnicode then LocaleMgr will only load locales which are in UTF-8, so you can be sure that all verse keys are in UTF-8.
Let me know if you need help, Joachim
------------------------------------------------------------------------
//
// C++ Implementation: btstringmgr
//
// Description: //
//
// Author: The BibleTime team <[EMAIL PROTECTED]>, (C) 2004
//
// Copyright: See COPYING file that comes with this distribution
//
//
#include "btstringmgr.h"
//System includes #include <ctype.h>
char* BTStringMgr::upperUTF8(char* text, const unsigned int maxlen) { const int max = (maxlen>0) ? maxlen : strlen(text); if (isUtf8(text)) { strncpy(text, (const char*)QString::fromUtf8(text).upper().utf8(), max); return text; } else { char* ret = text; while (*text) { *text = toupper(*text); text++; } return ret; }
return text; }
char* BTStringMgr::upperLatin1(char* text) { char* ret = text; while (*text) { *text++ = toupper(*text); } return ret; }
const bool BTStringMgr::supportsUnicode() const { return true; }
const bool BTStringMgr::isUtf8(const char *buf) { int i, n; register unsigned char c; bool gotone = false;
#define F 0 /* character never appears in text */ #define T 1 /* character appears in plain ASCII text */ #define I 2 /* character appears in ISO-8859 text */ #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
static const unsigned char text_chars[256] = { /* BEL BS HT LF FF CR */ F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */ /* ESC */ F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */ /* NEL */ X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */ };
/* *ulen = 0; */ for (i = 0; (c = buf[i]); i++) { if ((c & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ /* * Even if the whole file is valid UTF-8 sequences, * still reject it if it uses weird control characters. */
if (text_chars[c] != T) return false;
} else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */ return false; } else { /* 11xxxxxx begins UTF-8 */ int following;
if ((c & 0x20) == 0) { /* 110xxxxx */ following = 1; } else if ((c & 0x10) == 0) { /* 1110xxxx */ following = 2; } else if ((c & 0x08) == 0) { /* 11110xxx */ following = 3; } else if ((c & 0x04) == 0) { /* 111110xx */ following = 4; } else if ((c & 0x02) == 0) { /* 1111110x */ following = 5; } else return false;
for (n = 0; n < following; n++) { i++; if (!(c = buf[i])) goto done;
if ((c & 0x80) == 0 || (c & 0x40)) return false; } gotone = true; } } done: return gotone; /* don't claim it's UTF-8 if it's all 7-bit */ }
#undef F #undef T #undef I #undef X
------------------------------------------------------------------------
//
// C++ Interface: btstringmgr
//
// Description: //
//
// Author: The BibleTime team <[EMAIL PROTECTED]>, (C) 2004
//
// Copyright: See COPYING file that comes with this distribution
//
//
#ifndef BTSTRINGMGR_H
#define BTSTRINGMGR_H
//Sword includes #include <stringmgr.h>
//Qt includes #include <qstring.h>
using namespace sword;
class BTStringMgr : public StringMgr { public: /** Converts the param to an upper case Utf8 string * @param The text encoded in utf8 which should be turned into an upper case string */ virtual char* upperUTF8(char*, const unsigned int maxlen = 0); /** Converts the param to an uppercase latin1 string * @param The text encoded in latin1 which should be turned into an upper case string */ virtual char* upperLatin1(char*);
protected: virtual const bool supportsUnicode() const; /** CODE TAKEN FROM KDELIBS 3.2 * This function checks whether a string is utf8 or not. * * It was taken from kdelibs so we do not depend on KDE 3.2. */ const bool isUtf8(const char *buf); };
#endif
------------------------------------------------------------------------
_______________________________________________ sword-devel mailing list [EMAIL PROTECTED] http://www.crosswire.org/mailman/listinfo/sword-devel
_______________________________________________ sword-devel mailing list [EMAIL PROTECTED] http://www.crosswire.org/mailman/listinfo/sword-devel