Re: [sword-devel] MacSword 1.1.3

Troy A. Griffitts Thu, 25 Nov 2004 17:56:50 -0800

Hey guys, What's the status on ICU and StringMgr? I thought that if ICU was compiled into the library, it provided a subclass: ICUStringMgr that worked with UTF8. This would mean that Will doesn't have to do anything if he already includes ICU support.

        -Troy.

Joachim Ansorg wrote:

Hi,

it's simple - I hope :)

So all I have to do is replace char* StringMgr::upperUTF8(char* t,
const unsigned int maxlen)?

Reimplement upperUTF8 and then tell Sword to use an instance of your reimplementation to handle Unicode/Latin1 Strings with StringMgr::setSystemStringMgr.

If so, then what is maxlen for? Is it expecting t to be overwritten or
just a maximum buffer allocated? I assume that sword will dealloc any
buffer I return.

If maxlen is > 0 upper only maxlen chars. It expects t to be overwritten. No buffers are allocated withing upperUTF8. I attached the BTStringMgr we use in BibleTime. I advise to check a string if it contains unicode chars before uppering the chars using Unicode. Checking is a lot faster than without.

I hope that helps. And yes, we need better documentation :)

If you return true in supportsUnicode then LocaleMgr will only load locales which are in UTF-8, so you can be sure that all verse keys are in UTF-8.

Let me know if you need help,
Joachim


------------------------------------------------------------------------

#include "btstringmgr.h"

//System includes
#include <ctype.h>

char* BTStringMgr::upperUTF8(char* text, const unsigned int maxlen) {
        const int max = (maxlen>0) ? maxlen : strlen(text);
        
        if (isUtf8(text)) {
                strncpy(text, (const 
char*)QString::fromUtf8(text).upper().utf8(), max);
        
                return text;
        }
        else {
                char* ret = text;       
                while (*text) {
                        *text = toupper(*text);
                        text++;
                }
                
                return ret;
        }

        return text;
}

char* BTStringMgr::upperLatin1(char* text) {
        char* ret = text;       
        
        while (*text) {
                *text++ = toupper(*text);
        }
        
        return ret;
}

const bool BTStringMgr::supportsUnicode() const {
        return true;
}

const bool BTStringMgr::isUtf8(const char *buf) {
  int i, n;
  register unsigned char c;
  bool gotone = false;

#define F 0   /* character never appears in text */
#define T 1   /* character appears in plain ASCII text */
#define I 2   /* character appears in ISO-8859 text */
#define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */

  static const unsigned char text_chars[256] = {
        /*                  BEL BS HT LF    FF CR    */
        F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
        /*                              ESC          */
        F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
        /*            NEL                            */
        X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
        X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
  };

  /* *ulen = 0; */
  for (i = 0; (c = buf[i]); i++) {
    if ((c & 0x80) == 0) {        /* 0xxxxxxx is plain ASCII */
      /*
       * Even if the whole file is valid UTF-8 sequences,
       * still reject it if it uses weird control characters.
       */

      if (text_chars[c] != T)
        return false;

    } else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */
      return false;
    } else {                           /* 11xxxxxx begins UTF-8 */
      int following;

    if ((c & 0x20) == 0) {             /* 110xxxxx */
      following = 1;
    } else if ((c & 0x10) == 0) {      /* 1110xxxx */
      following = 2;
    } else if ((c & 0x08) == 0) {      /* 11110xxx */
      following = 3;
    } else if ((c & 0x04) == 0) {      /* 111110xx */
      following = 4;
    } else if ((c & 0x02) == 0) {      /* 1111110x */
      following = 5;
    } else
      return false;

      for (n = 0; n < following; n++) {
        i++;
        if (!(c = buf[i]))
          goto done;

        if ((c & 0x80) == 0 || (c & 0x40))
          return false;
      }
      gotone = true;
    }
  }
done:
  return gotone;   /* don't claim it's UTF-8 if it's all 7-bit */
}

#undef F
#undef T
#undef I
#undef X


------------------------------------------------------------------------

// // C++ Interface: btstringmgr // // Description: // // // Author: The BibleTime team <[EMAIL PROTECTED]>, (C) 2004 // // Copyright: See COPYING file that comes with this distribution // // #ifndef BTSTRINGMGR_H #define BTSTRINGMGR_H

//Sword includes
#include <stringmgr.h>

//Qt includes
#include <qstring.h>

using namespace sword;

class BTStringMgr : public StringMgr {
public:
        /** Converts the param to an upper case Utf8 string
        * @param The text encoded in utf8 which should be turned into an upper 
case string
        */      
        virtual char* upperUTF8(char*, const unsigned int maxlen = 0);
        
        /** Converts the param to an uppercase latin1 string
        * @param The text encoded in latin1 which should be turned into an 
upper case string
        */      
        virtual char* upperLatin1(char*);

protected:
        virtual const bool supportsUnicode() const;
        
        /** CODE TAKEN FROM KDELIBS 3.2
        * This function checks whether a string is utf8 or not.
        *
        * It was taken from kdelibs so we do not depend on KDE 3.2.
        */
        const bool isUtf8(const char *buf);
};

#endif


------------------------------------------------------------------------

_______________________________________________
sword-devel mailing list
[EMAIL PROTECTED]
http://www.crosswire.org/mailman/listinfo/sword-devel

_______________________________________________
sword-devel mailing list
[EMAIL PROTECTED]
http://www.crosswire.org/mailman/listinfo/sword-devel

Re: [sword-devel] MacSword 1.1.3

Reply via email to