i18npool/source/breakiterator/breakiterator_unicode.cxx | 27 ++++++++++++++-- sw/inc/breakit.hxx | 7 +++- sw/source/core/bastyp/breakit.cxx | 19 +++++++---- sw/source/core/txtnode/txtedt.cxx | 2 - 4 files changed, 44 insertions(+), 11 deletions(-)
New commits: commit ae716b07f7218fadf0143de1946cc9e0e2c08744 Author: Caolán McNamara <caol...@redhat.com> Date: Tue May 1 16:26:58 2012 +0100 Related: fdo#49208 optimize common case Change-Id: Ieec379b08cb9096b1c8187c2eda5053f093c612d diff --git a/sw/source/core/bastyp/breakit.cxx b/sw/source/core/bastyp/breakit.cxx index 1fbadd8..4c84379 100644 --- a/sw/source/core/bastyp/breakit.cxx +++ b/sw/source/core/bastyp/breakit.cxx @@ -172,9 +172,16 @@ sal_Int32 SwBreakIt::getGraphemeCount(const rtl::OUString& rText, sal_Int32 nSta sal_Int32 nCurPos = nStart; while (nCurPos < nEnd) { - sal_Int32 nCount2 = 1; - nCurPos = xBreak->nextCharacters(rText, nCurPos, lang::Locale(), - i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2); + //fdo#49208 cheat and assume that nothing can combine with a space + //to form a single grapheme + if (rText[nCurPos] == ' ') + ++nCurPos; + else + { + sal_Int32 nCount2 = 1; + nCurPos = xBreak->nextCharacters(rText, nCurPos, lang::Locale(), + i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2); + } ++nGraphemeCount; } commit ccc47b3db3eae25cc11bb709416c0b61747ca89e Author: Caolán McNamara <caol...@redhat.com> Date: Tue May 1 16:09:25 2012 +0100 Resolves: fdo#49208 icu string compare is shocking slow Change-Id: Iee3ab0ebbbb72e88e33dcbe0fcb4df1e4f60c301 diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx index 08a5e6c..4005780 100644 --- a/i18npool/source/breakiterator/breakiterator_unicode.cxx +++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx @@ -34,6 +34,7 @@ #include <unicode/udata.h> #include <rtl/strbuf.hxx> #include <rtl/ustring.hxx> +#include <string.h> U_CDECL_BEGIN extern const char OpenOffice_dat[]; @@ -94,6 +95,24 @@ class OOoRuleBasedBreakIterator : public RuleBasedBreakIterator { }; +namespace +{ + bool isEqual(const UnicodeString &rOne, const rtl::OUString &rOther) + { + sal_Int32 nLength = rOne.length(); + if (nLength != rOther.getLength()) + return false; + + //fdo#49208 operator== is implemented by compareTo etc in icu which is + //horrifically slow when all you want to know is that they're the same + //or not + const UChar *pOne = rOne.getBuffer(); + // UChar != sal_Unicode in MinGW + const UChar *pOther = reinterpret_cast<const UChar *>(rOther.getStr()); + return memcmp(pOne, pOther, nLength * sizeof(UChar)) == 0; + } +} + // loading ICU breakiterator on demand. void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::lang::Locale& rLocale, sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char *rule, const OUString& rText) throw(uno::RuntimeException) @@ -199,10 +218,10 @@ void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star:: } } - // UChar != sal_Unicode in MinGW - const UChar *pText = reinterpret_cast<const UChar *>(rText.getStr()); - if (newBreak || icuBI->aICUText.compare(pText, rText.getLength())) + if (newBreak || !isEqual(icuBI->aICUText, rText)) { + // UChar != sal_Unicode in MinGW + const UChar *pText = reinterpret_cast<const UChar *>(rText.getStr()); icuBI->aICUText=UnicodeString(pText, rText.getLength()); icuBI->aBreakIterator->setText(icuBI->aICUText); } commit fd4fe85329654883a0bf3304ad0aa8ef0bfde844 Author: Caolán McNamara <caol...@redhat.com> Date: Tue May 1 16:06:25 2012 +0100 Related: fdo#49208 don't copy string if we can reuse the original Change-Id: I95d82ce168fd1790107316460f6ddbd9f6b32e18 diff --git a/sw/inc/breakit.hxx b/sw/inc/breakit.hxx index 5fa2bd5..031d91c 100644 --- a/sw/inc/breakit.hxx +++ b/sw/inc/breakit.hxx @@ -96,7 +96,12 @@ public: sal_uInt16 GetRealScriptOfText( const rtl::OUString& rTxt, sal_Int32 nPos ) const; sal_uInt16 GetAllScriptsOfText( const rtl::OUString& rTxt ) const; - sal_Int32 getGraphemeCount(const rtl::OUString& rStr) const; + sal_Int32 getGraphemeCount(const rtl::OUString& rStr, + sal_Int32 nStart, sal_Int32 nEnd) const; + sal_Int32 getGraphemeCount(const rtl::OUString& rStr) const + { + return getGraphemeCount(rStr, 0, rStr.getLength()); + } }; #define SW_BREAKITER() SwBreakIt::Get() diff --git a/sw/source/core/bastyp/breakit.cxx b/sw/source/core/bastyp/breakit.cxx index 313fdca..1fbadd8 100644 --- a/sw/source/core/bastyp/breakit.cxx +++ b/sw/source/core/bastyp/breakit.cxx @@ -165,12 +165,12 @@ sal_uInt16 SwBreakIt::GetAllScriptsOfText( const rtl::OUString& rTxt ) const return nRet; } -sal_Int32 SwBreakIt::getGraphemeCount(const rtl::OUString& rText) const +sal_Int32 SwBreakIt::getGraphemeCount(const rtl::OUString& rText, sal_Int32 nStart, sal_Int32 nEnd) const { sal_Int32 nGraphemeCount = 0; - sal_Int32 nCurPos = 0; - while (nCurPos < rText.getLength()) + sal_Int32 nCurPos = nStart; + while (nCurPos < nEnd) { sal_Int32 nCount2 = 1; nCurPos = xBreak->nextCharacters(rText, nCurPos, lang::Locale(), diff --git a/sw/source/core/txtnode/txtedt.cxx b/sw/source/core/txtnode/txtedt.cxx index cb5e472..ae8fe55 100644 --- a/sw/source/core/txtnode/txtedt.cxx +++ b/sw/source/core/txtnode/txtedt.cxx @@ -1913,7 +1913,7 @@ void SwTxtNode::CountWords( SwDocStat& rStat, } } - nTmpChars = pBreakIt->getGraphemeCount(aExpandText.copy(nExpandBegin, nExpandEnd - nExpandBegin)); + nTmpChars = pBreakIt->getGraphemeCount(aExpandText, nExpandBegin, nExpandEnd); nTmpChars -= nNumOfMaskedChars; // no nTmpCharsExcludingSpaces adjust needed neither for blanked out MaskedChars commit 268ec2e64f89eb39fd5f02688787cd6f53e948b5 Author: Caolán McNamara <caol...@redhat.com> Date: Tue May 1 15:08:29 2012 +0100 Related: fdo#49208 crazy to create the string *twice* Change-Id: Ib31919672d0754fa4f650dcb32dc2c59a410b54c diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx index 0d6df0f..08a5e6c 100644 --- a/i18npool/source/breakiterator/breakiterator_unicode.cxx +++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx @@ -199,13 +199,15 @@ void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star:: } } - if (newBreak || icuBI->aICUText.compare(UnicodeString(reinterpret_cast<const UChar *>(rText.getStr()), rText.getLength()))) { // UChar != sal_Unicode in MinGW - icuBI->aICUText=UnicodeString(reinterpret_cast<const UChar *>(rText.getStr()), rText.getLength()); + // UChar != sal_Unicode in MinGW + const UChar *pText = reinterpret_cast<const UChar *>(rText.getStr()); + if (newBreak || icuBI->aICUText.compare(pText, rText.getLength())) + { + icuBI->aICUText=UnicodeString(pText, rText.getLength()); icuBI->aBreakIterator->setText(icuBI->aICUText); } } - sal_Int32 SAL_CALL BreakIterator_Unicode::nextCharacters( const OUString& Text, sal_Int32 nStartPos, const lang::Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
_______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits