lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx | 322 ++++++------------ lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx | 4 2 files changed, 121 insertions(+), 205 deletions(-)
New commits: commit b44339c2156dbef8a9adb4be88981631d0181c29 Author: Mike Kaganski <mike.kagan...@collabora.com> AuthorDate: Sun Nov 24 15:08:46 2024 +0500 Commit: Mike Kaganski <mike.kagan...@collabora.com> CommitDate: Sun Nov 24 16:11:30 2024 +0100 Deduplicate calls to hnj_hyphen_hyphenate3 Change-Id: Iecd3cf707fd692a10382f3a6be02c2dd94f02111 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/177201 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> diff --git a/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx index a9d6d779bacb..66d22f0b61a9 100644 --- a/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx +++ b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx @@ -253,6 +253,100 @@ bool LoadDictionary(HDInfo& rDict) rDict.eEnc = getTextEncodingFromCharset(dict->cset); return true; } + +OUString makeLowerCase(const OUString& aTerm, CharClass const* pCC) +{ + if (pCC) + return pCC->lowercase(aTerm); + return aTerm; +} + +OUString makeUpperCase(const OUString& aTerm, CharClass const* pCC) +{ + if (pCC) + return pCC->uppercase(aTerm); + return aTerm; +} + +OUString makeInitCap(const OUString& aTerm, CharClass const* pCC) +{ + sal_Int32 tlen = aTerm.getLength(); + if (pCC && tlen) + { + OUString bTemp = aTerm.copy(0, 1); + if (tlen > 1) + return (pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm, 1, (tlen - 1))); + + return pCC->uppercase(bTemp, 0, 1); + } + return aTerm; +} + +struct hyphenation_result +{ + int n = 0; + bool failed = true; + char** rep = nullptr; // replacements of discretionary hyphenation + int* pos = nullptr; // array of [hyphenation point] minus [deletion position] + int* cut = nullptr; // length of deletions in original word + std::unique_ptr<char[]> hyphens; + + ~hyphenation_result() + { + if (rep) + { + for (int i = 0; i < n; i++) + { + if (rep[i]) + free(rep[i]); + } + free(rep); + } + if (pos) + free(pos); + if (cut) + free(cut); + } +}; + +hyphenation_result getHyphens(std::u16string_view word, const HDInfo& hdInfo, sal_Int16 minLead, + sal_Int16 minTrail) +{ + // first convert any smart quotes or apostrophes to normal ones + OUStringBuffer aBuf(word); + for (sal_Int32 ix = 0; ix < aBuf.getLength(); ix++) + { + sal_Unicode ch = aBuf[ix]; + if ((ch == 0x201C) || (ch == 0x201D)) + aBuf[ix] = u'"'; + if ((ch == 0x2018) || (ch == 0x2019)) + aBuf[ix] = u'\''; + } + + // now convert word to all lowercase for pattern recognition + OUString nTerm(makeLowerCase(OUString::unacquired(aBuf), hdInfo.apCC.get())); + + // now convert word to needed encoding + OString encWord(OU2ENC(nTerm, hdInfo.eEnc)); + + // now strip off any ending periods + auto lastValidPos = std::string_view(encWord).find_last_not_of('.'); + if (lastValidPos == std::string_view::npos) + return {}; + + int n = lastValidPos + 1; + std::unique_ptr<char[]> hyphens(new char[n + 5]); + char** rep = nullptr; // replacements of discretionary hyphenation + int* pos = nullptr; // array of [hyphenation point] minus [deletion position] + int* cut = nullptr; // length of deletions in original word + + HyphenDict* dict = hdInfo.aPtr; + const bool failed = 0 != hnj_hyphen_hyphenate3( dict, encWord.getStr(), n, hyphens.get(), nullptr, + &rep, &pos, &cut, minLead, minTrail, + std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead - std::max<sal_Int16>(dict->lhmin, 2)), + std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2)) ); + return { n, failed, rep, pos, cut, std::move(hyphens) }; // buffers will free in dtor +} } const HDInfo* Hyphenator::getMatchingDict(const css::lang::Locale& aLocale) @@ -304,7 +398,6 @@ Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWo int nHyphenationPosAltHyph = -1; // hyphenate the word with that dictionary - HyphenDict* dict = pHDInfo->aPtr; rtl_TextEncoding eEnc = pHDInfo->eEnc; CharClass* pCC = pHDInfo->apCC.get(); @@ -316,68 +409,9 @@ Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWo CapType ct = capitalType(aWord, pCC); - // first convert any smart quotes or apostrophes to normal ones - OUStringBuffer rBuf(aWord); - sal_Int32 nc = rBuf.getLength(); - sal_Unicode ch; - for (sal_Int32 ix=0; ix < nc; ix++) - { - ch = rBuf[ix]; - if ((ch == 0x201C) || (ch == 0x201D)) - rBuf[ix] = u'"'; - if ((ch == 0x2018) || (ch == 0x2019)) - rBuf[ix] = u'\''; - } - OUString nWord(rBuf.makeStringAndClear()); - - // now convert word to all lowercase for pattern recognition - OUString nTerm(makeLowerCase(nWord, pCC)); - - // now convert word to needed encoding - OString encWord(OU2ENC(nTerm,eEnc)); - - int wordlen = encWord.getLength(); - std::unique_ptr<char[]> lcword(new char[wordlen + 1]); - std::unique_ptr<char[]> hyphens(new char[wordlen + 5]); - - char ** rep = nullptr; // replacements of discretionary hyphenation - int * pos = nullptr; // array of [hyphenation point] minus [deletion position] - int * cut = nullptr; // length of deletions in original word - - // copy converted word into simple char buffer - strcpy(lcword.get(),encWord.getStr()); - - // now strip off any ending periods - int n = wordlen-1; - while((n >=0) && (lcword[n] == '.')) - n--; - n++; - if (n > 0) - { - const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword.get(), n, hyphens.get(), nullptr, - &rep, &pos, &cut, minLead, minTrail, - std::max<sal_Int16>(dict->clhmin, std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead - std::max<sal_Int16>(dict->lhmin, 2))), - std::max<sal_Int16>(dict->crhmin, std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2))) ); - if (bFailed) - { - // whoops something did not work - if (rep) - { - for(int j = 0; j < n; j++) - { - if (rep[j]) free(rep[j]); - } - free(rep); - } - if (pos) free(pos); - if (cut) free(cut); - return nullptr; - } - } - - // now backfill hyphens[] for any removed trailing periods - for (int c = n; c < wordlen; c++) hyphens[c] = '0'; - hyphens[wordlen] = ' + auto result = getHyphens(aWord, *pHDInfo, minLead, minTrail); + if (result.failed) + return nullptr; sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading ); @@ -391,20 +425,20 @@ Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWo OUString sStems; // processed result of the compound word analysis, e.g. com|pound|word sal_Int32 nSuffixLen = 0; // do not remove break points in suffixes - for (sal_Int32 i = 0; i < n; i++) + for (sal_Int32 i = 0; i < result.n; i++) { int leftrep = 0; - bool hit = (n >= minLen); - if (!rep || !rep[i]) + bool hit = (result.n >= minLen); + if (!result.rep || !result.rep[i]) { - hit = hit && (hyphens[i]&1) && (i < Leading); + hit = hit && (result.hyphens[i] & 1) && (i < Leading); hit = hit && (i >= (minLead-1) ); - hit = hit && ((n - i - 1) >= minTrail); + hit = hit && ((result.n - i - 1) >= minTrail); } else { // calculate change character length before hyphenation point signed with '=' - for (char * c = rep[i]; *c && (*c != '='); c++) + for (char * c = result.rep[i]; *c && (*c != '='); c++) { if (eEnc == RTL_TEXTENCODING_UTF8) { @@ -414,9 +448,9 @@ Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWo else leftrep++; } - hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading); - hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) ); - hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail); + hit = hit && (result.hyphens[i] & 1) && ((i + leftrep - result.pos[i]) < Leading); + hit = hit && ((i + leftrep - result.pos[i]) >= (minLead-1) ); + hit = hit && ((result.n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(result.rep[i])) - leftrep - 1) >= minTrail); } if (hit) { @@ -577,10 +611,10 @@ Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWo } nHyphenationPos = i; - if (rep && rep[i]) + if (result.rep && result.rep[i]) { - nHyphenationPosAlt = i - pos[i]; - nHyphenationPosAltHyph = i + leftrep - pos[i]; + nHyphenationPosAlt = i - result.pos[i]; + nHyphenationPosAltHyph = i + leftrep - result.pos[i]; } } } @@ -588,17 +622,17 @@ Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWo Reference<XHyphenatedWord> xRes; if (nHyphenationPos != -1) { - if (rep && rep[nHyphenationPos]) + if (result.rep && result.rep[nHyphenationPos]) { // remove equal sign - char * s = rep[nHyphenationPos]; + char * s = result.rep[nHyphenationPos]; int eq = 0; for (; *s; s++) { if (*s == '=') eq = 1; if (eq) *s = *(s + 1); } - OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc); + OUString repHyphlow(result.rep[nHyphenationPos], strlen(result.rep[nHyphenationPos]), eEnc); OUString repHyph; switch (ct) { @@ -627,7 +661,7 @@ Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWo nHyphenationPosAltHyph : nHyphenationPos); // discretionary hyphenation xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), nPos, - aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph), + aWord.replaceAt(nHyphenationPosAlt + 1, result.cut[nHyphenationPos], repHyph), static_cast<sal_Int16>(nHyphenationPosAltHyph)); } else @@ -636,17 +670,6 @@ Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWo static_cast<sal_Int16>(nHyphenationPos), aWord, static_cast<sal_Int16>(nHyphenationPos)); } } - - if (rep) - { - for(int j = 0; j < n; j++) - { - if (rep[j]) free(rep[j]); - } - free(rep); - } - if (pos) free(pos); - if (cut) free(cut); return xRes; } return nullptr; @@ -690,78 +713,16 @@ Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const if (auto pHDInfo = getMatchingDict(aLocale)) { // hyphenate the word with that dictionary - HyphenDict* dict = pHDInfo->aPtr; - rtl_TextEncoding eEnc = pHDInfo->eEnc; - CharClass* pCC = pHDInfo->apCC.get(); - - // first handle smart quotes both single and double - OUStringBuffer rBuf(aWord); - sal_Int32 nc = rBuf.getLength(); - sal_Unicode ch; - for (sal_Int32 ix=0; ix < nc; ix++) - { - ch = rBuf[ix]; - if ((ch == 0x201C) || (ch == 0x201D)) - rBuf[ix] = u'"'; - if ((ch == 0x2018) || (ch == 0x2019)) - rBuf[ix] = u'\''; - } - OUString nWord(rBuf.makeStringAndClear()); - - // now convert word to all lowercase for pattern recognition - OUString nTerm(makeLowerCase(nWord, pCC)); - - // now convert word to needed encoding - OString encWord(OU2ENC(nTerm,eEnc)); - - sal_Int32 wordlen = encWord.getLength(); - std::unique_ptr<char[]> lcword(new char[wordlen+1]); - std::unique_ptr<char[]> hyphens(new char[wordlen+5]); - char ** rep = nullptr; // replacements of discretionary hyphenation - int * pos = nullptr; // array of [hyphenation point] minus [deletion position] - int * cut = nullptr; // length of deletions in original word - - // copy converted word into simple char buffer - strcpy(lcword.get(),encWord.getStr()); - - // first remove any trailing periods - sal_Int32 n = wordlen-1; - while((n >=0) && (lcword[n] == '.')) - n--; - n++; - if (n > 0) - { - const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword.get(), n, hyphens.get(), nullptr, - &rep, &pos, &cut, minLead, minTrail, - std::max<sal_Int16>(dict->clhmin, std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead - std::max<sal_Int16>(dict->lhmin, 2))), - std::max<sal_Int16>(dict->crhmin, std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2))) ); - if (bFailed) - { - if (rep) - { - for(int j = 0; j < n; j++) - { - if (rep[j]) free(rep[j]); - } - free(rep); - } - if (pos) free(pos); - if (cut) free(cut); - - return nullptr; - } - } - // now backfill hyphens[] for any removed periods - for (sal_Int32 c = n; c < wordlen; c++) - hyphens[c] = '0'; - hyphens[wordlen] = ' + auto result = getHyphens(aWord, *pHDInfo, minLead, minTrail); + if (result.failed) + return nullptr; sal_Int32 nHyphCount = 0; // FIXME: shouldn't we iterate code points instead? - for (sal_Int32 i = 0; i < nWord.getLength(); i++) + for (sal_Int32 i = 0; i < aWord.getLength(); i++) { - if (hyphens[i]&1) + if (result.hyphens[i] & 1) nHyphCount++; } @@ -770,11 +731,11 @@ Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUStringBuffer hyphenatedWordBuffer; nHyphCount = 0; - for (sal_Int32 i = 0; i < nWord.getLength(); i++) + for (sal_Int32 i = 0; i < aWord.getLength(); i++) { hyphenatedWordBuffer.append(aWord[i]); // hyphenation position - if (hyphens[i]&1) + if (result.hyphens[i] & 1) { // linguistic::PossibleHyphens is stuck with // css::uno::Sequence<sal_Int16> because of @@ -797,54 +758,13 @@ Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear(); - Reference< XPossibleHyphens > xRes = PossibleHyphens::CreatePossibleHyphens( + return PossibleHyphens::CreatePossibleHyphens( aWord, LinguLocaleToLanguage( aLocale ), hyphenatedWord, aHyphPos); - - if (rep) - { - for(int j = 0; j < n; j++) - { - if (rep[j]) free(rep[j]); - } - free(rep); - } - if (pos) free(pos); - if (cut) free(cut); - - return xRes; } return nullptr; } -OUString Hyphenator::makeLowerCase(const OUString& aTerm, CharClass const * pCC) -{ - if (pCC) - return pCC->lowercase(aTerm); - return aTerm; -} - -OUString Hyphenator::makeUpperCase(const OUString& aTerm, CharClass const * pCC) -{ - if (pCC) - return pCC->uppercase(aTerm); - return aTerm; -} - -OUString Hyphenator::makeInitCap(const OUString& aTerm, CharClass const * pCC) -{ - sal_Int32 tlen = aTerm.getLength(); - if (pCC && tlen) - { - OUString bTemp = aTerm.copy(0,1); - if (tlen > 1) - return ( pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm,1,(tlen-1)) ); - - return pCC->uppercase(bTemp, 0, 1); - } - return aTerm; -} - sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener( const Reference< XLinguServiceEventListener >& rxLstnr ) { diff --git a/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx b/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx index c66650891e6b..f36c24afd2be 100644 --- a/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx +++ b/lingucomponent/source/hyphenator/hyphen/hyphenimp.hxx @@ -116,10 +116,6 @@ public: virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; private: - static OUString makeLowerCase(const OUString&, CharClass const *); - static OUString makeUpperCase(const OUString&, CharClass const *); - static OUString makeInitCap(const OUString&, CharClass const *); - void ensureLocales(); const HDInfo* getMatchingDict(const css::lang::Locale& aLocale); };