editeng/qa/unit/core-test.cxx | 33 +++- editeng/source/editeng/edtspell.cxx | 62 +++++--- editeng/source/misc/svxacorr.cxx | 231 +++++++++++++++++++++++++------ include/editeng/svxacorr.hxx | 72 ++++++++- sw/qa/extras/uiwriter/data/tdf158454.odt |binary sw/qa/extras/uiwriter/uiwriter6.cxx | 39 +++++ sw/source/core/edit/acorrect.cxx | 176 ++++++++++++----------- 7 files changed, 451 insertions(+), 162 deletions(-)
New commits: commit 76c96ca7c9a6e0d847ec5dc186c6e47ab6061f5f Author: Theppitak Karoonboonyanan <theppi...@gmail.com> AuthorDate: Thu Nov 30 22:34:29 2023 +0700 Commit: Jonathan Clark <jonat...@libreoffice.org> CommitDate: Fri Jun 21 20:34:22 2024 +0200 tdf#158454 Add Thai Autocorrect Support, coding part SvxAutoCorrDoc::ChgAutoCorrWord() implementations: correct multiple patterns * include/editeng/svxacorr.hxx, editeng/source/misc/svxacorr.cxx: - Add classes SvxAutocorrWordList::{Iterator,WordSearchStatus}. - Make SvxAutocorrWordList::SearchWordsInList() return WordSearchStatus so the search can be continued with the added SvxAutocorrWordList::SearchWordsNext() method. - Make SvxAutoCorrect::SearchWordsInList(), and its lcl_SearchWordsInList() companion, return WordSearchStatus propagated from SvxAutocorrWordList::SearchWordsInList(). - SvxAutocorrWordList::WordMatches(): The existing mechanism of preventing collision of patterns like in tdf#83037 (→ and ← and ↔ autocorrect collisions) was by storing the matched string of wildcard pattern back to the list without overwriting existing one. If the matched string was found in the list, it would just be treated as no matching. While this worked well for collision prevention, it caused failure on the new exhaustive wildcard pattern visiting method when autocorrecting the second text chunk with the same content. In such situation, all intermediate stages of corrections of the first text chunk would be recorded into the list. And, in the second chunk, the first stage would just be applied from the recorded pattern, but all the next stages would be refused due to the "collision" with the recorded patterns. Moreover, the new method would cause the list to grow more quickly as the autocorrections are done. To solve the problem, just "peek" for the collision instead of actually storing it. And SvxAutocorrWordList::ContainsPattern() is added for this purpose. * editeng/qa/unit/core-test.cxx: - Modify TestAutoCorrDoc::ChgAutoCorrWord() to iterate through all patterns, instead of finishing at the first one. * editeng/source/editeng/edtspell.cxx: - Ditto for EdtAutoCorrDoc::ChgAutoCorrWord(). * sw/source/core/edit/acorrect.cxx: - Ditto for SwAutoCorrDoc::ChgAutoCorrWord(). - SwAutoCorrDoc::ChgAutoCorrWord(): Remove old dead code for autocorrection on text with redlines. * sw/qa/extras/uiwriter/uiwriter6.cxx, +sw/qa/extras/uiwriter/data/tdf158454.odt: - Add unit test "testTdf158454". Change-Id: I8fb4a628a977b79b0ed2f239fd3749f15823b5df Reviewed-on: https://gerrit.libreoffice.org/c/core/+/160160 Tested-by: Jenkins Reviewed-by: Jonathan Clark <jonat...@libreoffice.org> diff --git a/editeng/qa/unit/core-test.cxx b/editeng/qa/unit/core-test.cxx index a57a533c42c4..f4032b09e120 100644 --- a/editeng/qa/unit/core-test.cxx +++ b/editeng/qa/unit/core-test.cxx @@ -440,18 +440,35 @@ private: { //fprintf(stderr, "TestAutoCorrDoc::ChgAutoCorrWord "); - if (m_rText.isEmpty()) + if (m_rText.isEmpty()) { return false; + } LanguageTag aLanguageTag(m_eLang); - const SvxAutocorrWord* pFnd - = rACorrect.SearchWordsInList(m_rText, rSttPos, nEndPos, *this, aLanguageTag); - if (pFnd && pFnd->IsTextOnly()) + sal_Int32 sttPos = rSttPos; + auto pStatus = rACorrect.SearchWordsInList(m_rText, sttPos, nEndPos, + *this, aLanguageTag); + if (pStatus) { - m_rText = m_rText.replaceAt(rSttPos, nEndPos, pFnd->GetLong()); - if (pPara) - pPara->clear(); // =&pCurNode->GetString(); - return true; + sal_Int32 minSttPos = sttPos; + do { + const SvxAutocorrWord* pFnd = pStatus->GetAutocorrWord(); + if (pFnd && pFnd->IsTextOnly()) + { + m_rText = m_rText.replaceAt(sttPos, nEndPos, pFnd->GetLong()); + nEndPos = sttPos + pFnd->GetLong().getLength(); + if( pPara ) { + pPara->clear(); // =&pCurNode->GetString(); + } + return true; + } + if (sttPos < minSttPos) { + minSttPos = sttPos; + } + sttPos = rSttPos; + } while (SvxAutoCorrect::SearchWordsNext(m_rText, sttPos, nEndPos, + *pStatus)); + rSttPos = minSttPos; } return false; diff --git a/editeng/source/editeng/edtspell.cxx b/editeng/source/editeng/edtspell.cxx index b989755d8be4..77ff6fab9019 100644 --- a/editeng/source/editeng/edtspell.cxx +++ b/editeng/source/editeng/edtspell.cxx @@ -647,32 +647,52 @@ bool EdtAutoCorrDoc::ChgAutoCorrWord( sal_Int32& rSttPos, OUString aShort( pCurNode->Copy( rSttPos, nEndPos - rSttPos ) ); bool bRet = false; - if( aShort.isEmpty() ) + if( aShort.isEmpty() ) { return bRet; + } LanguageTag aLanguageTag( mpEditEngine->GetLanguage( EditPaM( pCurNode, rSttPos+1 ) ).nLang ); - const SvxAutocorrWord* pFnd = rACorrect.SearchWordsInList( - pCurNode->GetString(), rSttPos, nEndPos, *this, aLanguageTag); - if( pFnd && pFnd->IsTextOnly() ) - { - - // replace also last colon of keywords surrounded by colons (for example, ":name:") - bool replaceLastChar = pFnd->GetShort()[0] == ':' && pFnd->GetShort().endsWith(":"); - - // then replace - EditSelection aSel( EditPaM( pCurNode, rSttPos ), - EditPaM( pCurNode, nEndPos + (replaceLastChar ? 1 : 0) )); - aSel = mpEditEngine->DeleteSelection(aSel); - SAL_WARN_IF(nCursor < nEndPos, "editeng", - "Cursor in the heart of the action?!"); - nCursor -= ( nEndPos-rSttPos ); - mpEditEngine->InsertText(aSel, pFnd->GetLong()); - nCursor = nCursor + pFnd->GetLong().getLength(); - if( pPara ) - *pPara = pCurNode->GetString(); - bRet = true; + sal_Int32 sttPos = rSttPos; + auto pStatus = rACorrect.SearchWordsInList(pCurNode->GetString(), + sttPos, nEndPos, + *this, aLanguageTag); + if( !pStatus ) { + return bRet; } + sal_Int32 minSttPos = sttPos; + do { + const SvxAutocorrWord* pFnd = pStatus->GetAutocorrWord(); + if( pFnd && pFnd->IsTextOnly() ) + { + // replace also last colon of keywords surrounded by colons + // (for example, ":name:") + bool replaceLastChar = pFnd->GetShort()[0] == ':' + && pFnd->GetShort().endsWith(":"); + + // then replace + EditSelection aSel( EditPaM( pCurNode, sttPos ), + EditPaM( pCurNode, nEndPos + (replaceLastChar ? 1 : 0) )); + aSel = mpEditEngine->DeleteSelection(aSel); + SAL_WARN_IF(nCursor < nEndPos, "editeng", + "Cursor in the heart of the action?!"); + nCursor -= ( nEndPos-sttPos ); + mpEditEngine->InsertText(aSel, pFnd->GetLong()); + nCursor = nCursor + pFnd->GetLong().getLength(); + nEndPos = sttPos + pFnd->GetLong().getLength(); + if( pPara ) { + *pPara = pCurNode->GetString(); + } + bRet = true; + if( sttPos < minSttPos ) { + minSttPos = sttPos; + } + } + sttPos = rSttPos; + } while( SvxAutoCorrect::SearchWordsNext(pCurNode->GetString(), + sttPos, nEndPos, *pStatus) ); + rSttPos = minSttPos; + return bRet; } diff --git a/editeng/source/misc/svxacorr.cxx b/editeng/source/misc/svxacorr.cxx index 40d8804f0f4f..6730a2c82d16 100644 --- a/editeng/source/misc/svxacorr.cxx +++ b/editeng/source/misc/svxacorr.cxx @@ -2024,20 +2024,21 @@ static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackage rPackageName = aBuf.makeStringAndClear(); } -static const SvxAutocorrWord* lcl_SearchWordsInList( - SvxAutoCorrectLanguageLists* pList, std::u16string_view rTxt, - sal_Int32& rStt, sal_Int32 nEndPos) +static std::optional<SvxAutocorrWordList::WordSearchStatus> +lcl_SearchWordsInList( SvxAutoCorrectLanguageLists* pList, + std::u16string_view rTxt, + sal_Int32& rStt, sal_Int32 nEndPos ) { const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList(); return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos ); } // the search for the words in the substitution table -const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList( - std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos, - SvxAutoCorrDoc&, LanguageTag& rLang ) +std::optional<SvxAutocorrWordList::WordSearchStatus> +SvxAutoCorrect::SearchWordsInList( + std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos, + SvxAutoCorrDoc&, LanguageTag& rLang ) { - const SvxAutocorrWord* pRet = nullptr; LanguageTag aLanguageTag( rLang); if( aLanguageTag.isSystemLocale() ) aLanguageTag.reset( MsLangId::getConfiguredSystemLanguage()); @@ -2053,14 +2054,13 @@ const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList( const auto iter = m_aLangTable.find(aLanguageTag); assert(iter != m_aLangTable.end()); SvxAutoCorrectLanguageLists & rList = iter->second; - pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos ); + auto pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos ); if( pRet ) { rLang = aLanguageTag; return pRet; } - else - return nullptr; + return std::nullopt; } // If it still could not be found here, then keep on searching @@ -2074,7 +2074,7 @@ const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList( { //the language is available - so bring it on SvxAutoCorrectLanguageLists& rList = m_aLangTable.find(aLanguageTag)->second; - pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos ); + auto pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos ); if( pRet ) { rLang = aLanguageTag; @@ -2089,14 +2089,22 @@ const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList( const auto iter = m_aLangTable.find(aLanguageTag); assert(iter != m_aLangTable.end()); SvxAutoCorrectLanguageLists& rList = iter->second; - pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos ); + auto pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos ); if( pRet ) { rLang = aLanguageTag; return pRet; } } - return nullptr; + return std::nullopt; +} + +bool SvxAutoCorrect::SearchWordsNext( + std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos, + SvxAutocorrWordList::WordSearchStatus& rStatus ) +{ + const SvxAutocorrWordList* pWordList = rStatus.GetAutocorrWordList(); + return pWordList->SearchWordsNext( rTxt, rStt, nEndPos, rStatus ); } bool SvxAutoCorrect::FindInWordStartExceptList( LanguageType eLang, @@ -2971,6 +2979,92 @@ void SvxAutocorrWordList::DeleteAndDestroyAll() mpImpl->DeleteAndDestroyAll(); } +struct SvxAutocorrWordList::Iterator::Impl { + typedef SvxAutocorrWordList::AutocorrWordSetType::const_iterator VecIterType; + typedef AutocorrWordHashType::const_iterator HashIterType; + + HashIterType mHashIter, mHashEnd; + VecIterType mSortedVectorIter, mSortedVectorEnd; + + Impl(const HashIterType& hashIter, const HashIterType& hashEnd, + const VecIterType& vecIter, const VecIterType& vecEnd) + : mHashIter(hashIter), mHashEnd(hashEnd), + mSortedVectorIter(vecIter), mSortedVectorEnd(vecEnd) {} + + bool Step() { + // Iterate hash table, followed by sorted vector + if (mHashIter != mHashEnd) { + return ++mHashIter != mHashEnd + || mSortedVectorIter != mSortedVectorEnd; + } + return ++mSortedVectorIter != mSortedVectorEnd; + } + + const SvxAutocorrWord& operator*() { + return (mHashIter == mHashEnd) ? *mSortedVectorIter : mHashIter->second; + } + const SvxAutocorrWord* operator->() { + return (mHashIter == mHashEnd) ? &*mSortedVectorIter : &mHashIter->second; + } +}; + +SvxAutocorrWordList::Iterator::Iterator( + std::unique_ptr<SvxAutocorrWordList::Iterator::Impl> pImpl +) : mpImpl(std::move(pImpl)) +{ +} + +SvxAutocorrWordList::Iterator::Iterator( + const SvxAutocorrWordList::Iterator& it +) : mpImpl(new Impl(*(it.mpImpl))) +{ +} + +SvxAutocorrWordList::Iterator::~Iterator() +{ +} + +bool SvxAutocorrWordList::Iterator::Step() +{ + return mpImpl->Step(); +} + +const SvxAutocorrWord& SvxAutocorrWordList::Iterator::operator*() const +{ + return **mpImpl; +} + +const SvxAutocorrWord* SvxAutocorrWordList::Iterator::operator->() const +{ + return mpImpl->operator->(); +} + +bool SvxAutocorrWordList::ContainsPattern(const OUString& aShort) const +{ + // check hash table first + if (mpImpl->maHash.contains(aShort)) { + return true; + } + + // then do binary search on sorted vector + CollatorWrapper& rCmp = ::GetCollatorWrapper(); + auto it = std::lower_bound(mpImpl->maSortedVector.begin(), + mpImpl->maSortedVector.end(), + aShort, + [&](const SvxAutocorrWord& elm, + const OUString& val) { + return rCmp.compareString(elm.GetShort(), + val) < 0; + } ); + if (it != mpImpl->maSortedVector.end() + && rCmp.compareString(aShort, it->GetShort()) == 0) + { + return true; + } + + return false; +} + // returns true if inserted const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const { @@ -3058,10 +3152,11 @@ const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedCo return mpImpl->maSortedVector; } -const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd, - std::u16string_view rTxt, - sal_Int32 &rStt, - sal_Int32 nEndPos) const +std::optional<SvxAutocorrWord> +SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd, + std::u16string_view rTxt, + sal_Int32 &rStt, + sal_Int32 nEndPos) const { const OUString& rChk = pFnd->GetShort(); @@ -3074,7 +3169,9 @@ const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *p bool bColonNameColon = static_cast<sal_Int32>(rTxt.size()) > nEndPos && rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":"); if ( nEndPos + (bColonNameColon ? 1 : 0) < rChk.getLength() - left_wildcard - right_wildcard ) - return nullptr; + { + return std::nullopt; + } bool bWasWordDelim = false; sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard; @@ -3093,18 +3190,26 @@ const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *p { // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14 if (static_cast<sal_Int32>(rTxt.size()) > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1) - return nullptr; - return pFnd; + { + return std::nullopt; + } + return *pFnd; } // get the first word delimiter position before the matching ".*word" pattern while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ]))) ; if (bWasWordDelim) rStt++; + + // don't let wildcard pattern override non-wildcard one + OUString aShort(rTxt.substr(rStt, nEndPos - rStt)); + if (ContainsPattern(aShort)) { + return std::nullopt; + } + OUString left_pattern( rTxt.substr(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard) ); // avoid double spaces before simple "word" replacement left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().subView(1) : pFnd->GetLong(); - if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(OUString(rTxt.substr(rStt, nEndPos - rStt)), left_pattern) ) ) - return pNew; + return SvxAutocorrWord(aShort, left_pattern); } } else // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support @@ -3118,7 +3223,7 @@ const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *p while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]))) ; // search the first occurrence (with a left word delimitation, if needed) - size_t nFndPos = std::u16string_view::npos; + size_t nFndPos = rStt - 1; do { nFndPos = rTxt.find( sTmp, nFndPos + 1); if (nFndPos == std::u16string_view::npos) @@ -3139,10 +3244,14 @@ const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *p } if (nEndPos + extra_repl <= static_cast<sal_Int32>(nFndPos)) { - return nullptr; + return std::nullopt; } - // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations" + // return matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations" OUString aShort( rTxt.substr(nFndPos, nEndPos - nFndPos + extra_repl) ); + // don't let wildcard pattern override non-wildcard one + if (ContainsPattern(aShort)) { + return std::nullopt; + } OUString aLong; rStt = nFndPos; @@ -3158,42 +3267,84 @@ const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *p { sal_Int32 nTmp(nFndPos); while (nTmp < static_cast<sal_Int32>(nSttWdPos) && !IsWordDelim(rTxt[nTmp])) + { nTmp++; - if (nTmp < static_cast<sal_Int32>(nSttWdPos)) + } + if (nTmp < static_cast<sal_Int32>(nSttWdPos)) { break; // word delimiter found + } buf.append(rTxt.substr(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong()); nFndPos = nSttWdPos + sTmp.getLength(); } } while (nSttWdPos != std::u16string_view::npos); - if (static_cast<sal_Int32>(nEndPos - nFndPos) > extra_repl) + if (static_cast<sal_Int32>(nEndPos - nFndPos) > extra_repl) { buf.append(rTxt.substr(nFndPos, nEndPos - nFndPos)); + } aLong = buf.makeStringAndClear(); } - if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) ) + if ( (static_cast<sal_Int32>(rTxt.size()) > nEndPos && IsWordDelim(rTxt[nEndPos])) || static_cast<sal_Int32>(rTxt.size()) == nEndPos ) { - if ( (static_cast<sal_Int32>(rTxt.size()) > nEndPos && IsWordDelim(rTxt[nEndPos])) || static_cast<sal_Int32>(rTxt.size()) == nEndPos ) - return pNew; + return SvxAutocorrWord(aShort, aLong); } } } - return nullptr; + return std::nullopt; } -const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(std::u16string_view rTxt, sal_Int32& rStt, - sal_Int32 nEndPos) const +std::optional<SvxAutocorrWordList::WordSearchStatus> +SvxAutocorrWordList::SearchWordsInList(std::u16string_view rTxt, + sal_Int32& rStt, + sal_Int32 nEndPos) const { - for (auto const& elem : mpImpl->maHash) + for (auto it = mpImpl->maHash.begin(); it != mpImpl->maHash.end(); ++it) { - if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) ) - return pTmp; + if(auto pTmp = WordMatches(&it->second, rTxt, rStt, nEndPos)) + { + return WordSearchStatus( + *pTmp, this, + Iterator(std::make_unique<Iterator::Impl>( + it, mpImpl->maHash.end(), + mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end() + )) + ); + } } - for (auto const& elem : mpImpl->maSortedVector) + for (auto it = mpImpl->maSortedVector.begin(); + it != mpImpl->maSortedVector.end(); ++it) { - if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) ) - return pTmp; + if(auto pTmp = WordMatches(&*it, rTxt, rStt, nEndPos)) + { + return WordSearchStatus( + *pTmp, this, + Iterator(std::make_unique<Iterator::Impl>( + mpImpl->maHash.end(), mpImpl->maHash.end(), + it, mpImpl->maSortedVector.end() + )) + ); + } } - return nullptr; + + return std::nullopt; +} + +bool +SvxAutocorrWordList::SearchWordsNext(std::u16string_view rTxt, + sal_Int32& rStt, + sal_Int32 nEndPos, + SvxAutocorrWordList::WordSearchStatus& rStatus) const +{ + while(rStatus.StepIter()) + { + if(auto pTmp = WordMatches(rStatus.GetWordAtIter(), + rTxt, rStt, nEndPos)) + { + rStatus.mFnd = *pTmp; + return true; + } + } + + return false; } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/editeng/svxacorr.hxx b/include/editeng/svxacorr.hxx index d85a3d242783..d30a9795a743 100644 --- a/include/editeng/svxacorr.hxx +++ b/include/editeng/svxacorr.hxx @@ -163,15 +163,62 @@ class EDITENG_DLLPUBLIC SvxAutocorrWordList SvxAutocorrWordList( const SvxAutocorrWordList& ) = delete; const SvxAutocorrWordList& operator= ( const SvxAutocorrWordList& ) = delete; - const SvxAutocorrWord* WordMatches(const SvxAutocorrWord *pFnd, - std::u16string_view rTxt, - sal_Int32 &rStt, - sal_Int32 nEndPos) const; + std::optional<SvxAutocorrWord> WordMatches(const SvxAutocorrWord *pFnd, + std::u16string_view rTxt, + sal_Int32 &rStt, + sal_Int32 nEndPos) const; public: + class EDITENG_DLLPUBLIC Iterator { + struct Impl; + std::unique_ptr<Impl> mpImpl; + + // For construction from containers in *SvxAutocorrWordList::mpImpl + friend class SvxAutocorrWordList; + Iterator(std::unique_ptr<Impl> pImpl); + + public: + Iterator(const Iterator& it); + ~Iterator(); + bool Step(); + const SvxAutocorrWord& operator*() const; + const SvxAutocorrWord* operator->() const; + }; + + class EDITENG_DLLPUBLIC WordSearchStatus { + SvxAutocorrWord mFnd; + + // For iteration + friend class SvxAutocorrWordList; + const SvxAutocorrWordList* mpAutocorrWordList; + SvxAutocorrWordList::Iterator mAutocorrWordListIter; + + public: + WordSearchStatus( + const SvxAutocorrWord& aFnd, + const SvxAutocorrWordList* pAutocorrWordList, + const SvxAutocorrWordList::Iterator& autocorrWordListIter + ) : mFnd(aFnd), mpAutocorrWordList(pAutocorrWordList), + mAutocorrWordListIter(autocorrWordListIter) + {} + const SvxAutocorrWord* GetAutocorrWord() const { + return &mFnd; + } + const SvxAutocorrWordList* GetAutocorrWordList() const { + return mpAutocorrWordList; + } + const SvxAutocorrWord* GetWordAtIter() const { + return &*mAutocorrWordListIter; + } + bool StepIter() { + return mAutocorrWordListIter.Step(); + } + }; + SvxAutocorrWordList(); // free any objects still in the set ~SvxAutocorrWordList(); void DeleteAndDestroyAll(); + bool ContainsPattern(const OUString& sShort) const; const SvxAutocorrWord* Insert(SvxAutocorrWord aWord) const; std::optional<SvxAutocorrWord> FindAndRemove(const SvxAutocorrWord *pWord); void LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt); @@ -181,7 +228,10 @@ public: typedef std::vector<SvxAutocorrWord> AutocorrWordSetType; const AutocorrWordSetType & getSortedContent() const; - const SvxAutocorrWord* SearchWordsInList(std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos) const; + std::optional<WordSearchStatus> + SearchWordsInList(std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos) const; + bool + SearchWordsNext(std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos, WordSearchStatus& rStatus) const; }; class EDITENG_DLLPUBLIC SvxAutoCorrectLanguageLists @@ -323,10 +373,14 @@ public: // nEnd - to check position - as of this item forward // rLang - Input: in which language is searched // Output: in which "language list" was it found - const SvxAutocorrWord* SearchWordsInList( std::u16string_view rTxt, - sal_Int32& rStt, sal_Int32 nEndPos, - SvxAutoCorrDoc& rDoc, - LanguageTag& rLang ); + std::optional<SvxAutocorrWordList::WordSearchStatus> + SearchWordsInList( std::u16string_view rTxt, + sal_Int32& rStt, sal_Int32 nEndPos, + SvxAutoCorrDoc& rDoc, + LanguageTag& rLang ); + static bool SearchWordsNext( std::u16string_view rTxt, + sal_Int32& rStt, sal_Int32 nEndPos, + SvxAutocorrWordList::WordSearchStatus& rStatus ); // Query/Set the Character for the Quote substitution sal_Unicode GetStartSingleQuote() const { return cStartSQuote; } diff --git a/sw/qa/extras/uiwriter/data/tdf158454.odt b/sw/qa/extras/uiwriter/data/tdf158454.odt new file mode 100644 index 000000000000..13e31b64882f Binary files /dev/null and b/sw/qa/extras/uiwriter/data/tdf158454.odt differ diff --git a/sw/qa/extras/uiwriter/uiwriter6.cxx b/sw/qa/extras/uiwriter/uiwriter6.cxx index a9c7ecc9e176..6729e081ab19 100644 --- a/sw/qa/extras/uiwriter/uiwriter6.cxx +++ b/sw/qa/extras/uiwriter/uiwriter6.cxx @@ -3758,6 +3758,45 @@ CPPUNIT_TEST_FIXTURE(SwUiWriterTest6, testTdf155407) } } +CPPUNIT_TEST_FIXTURE(SwUiWriterTest6, testTdf158454) +{ + createSwDoc("tdf158454.odt"); + SwDoc* pDoc = getSwDoc(); + SwWrtShell* pWrtShell = pDoc->GetDocShell()->GetWrtShell(); + CPPUNIT_ASSERT(pWrtShell); + SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get()); + + // * without change tracking + CPPUNIT_ASSERT(!(pWrtShell->GetRedlineFlags() & RedlineFlags::On)); + + // Thai single autocorrect (อนุญาติ -> อนุญาต) + emulateTyping(*pTextDoc, u"อนุญาติ "); + OUString sReplaced = u"อนุญาต (จบ)"_ustr; + CPPUNIT_ASSERT_EQUAL(sReplaced, getParagraph(1)->getString()); + + // Thai multiple autocorrects (กงศุลสังเกตุกระทันหัน -> กงสุลสังเกตกะทันหัน) + emulateTyping(*pTextDoc, u"กงศุลสังเกตุกระทันหัน "); + sReplaced = u"อนุญาต กงสุลสังเกตกะทันหัน (จบ)"_ustr; + CPPUNIT_ASSERT_EQUAL(sReplaced, getParagraph(1)->getString()); + + // * with change tracking (showing redlines) + RedlineFlags const nMode(pWrtShell->GetRedlineFlags() | RedlineFlags::On); + CPPUNIT_ASSERT(nMode & (RedlineFlags::ShowDelete | RedlineFlags::ShowInsert)); + pWrtShell->SetRedlineFlags(nMode); + CPPUNIT_ASSERT(nMode & RedlineFlags::On); + CPPUNIT_ASSERT(nMode & RedlineFlags::ShowDelete); + + // Thai single autocorrect (อนุญาติ -> อนุญาต) + emulateTyping(*pTextDoc, u"อนุญาติ "); + sReplaced = u"อนุญาต กงสุลสังเกตกะทันหัน อนุญาต (จบ)"_ustr; + CPPUNIT_ASSERT_EQUAL(sReplaced, getParagraph(1)->getString()); + + // Thai multiple autocorrects (กงศุลสังเกตุกระทันหัน -> กงสุลสังเกตกะทันหัน) + emulateTyping(*pTextDoc, u"กงศุลสังเกตุกระทันหัน "); + sReplaced = u"อนุญาต กงสุลสังเกตกะทันหัน อนุญาต กงสุลสังเกตกะทันหัน (จบ)"_ustr; + CPPUNIT_ASSERT_EQUAL(sReplaced, getParagraph(1)->getString()); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sw/source/core/edit/acorrect.cxx b/sw/source/core/edit/acorrect.cxx index 6f112291bbef..71bc334be04d 100644 --- a/sw/source/core/edit/acorrect.cxx +++ b/sw/source/core/edit/acorrect.cxx @@ -393,110 +393,118 @@ bool SwAutoCorrDoc::ChgAutoCorrWord( sal_Int32& rSttPos, sal_Int32 nEndPos, pTextNd->getLayoutFrame(m_rEditSh.GetLayout()))); assert(pFrame); - const OUString sFrameText = pFrame->GetText(); - const SvxAutocorrWord* pFnd = rACorrect.SearchWordsInList( - sFrameText, rSttPos, nEndPos, *this, aLanguageTag); + OUString sFrameText = pFrame->GetText(); + sal_Int32 sttPos = rSttPos; + auto pStatus = rACorrect.SearchWordsInList(sFrameText, sttPos, nEndPos, + *this, aLanguageTag); + SwDoc* pDoc = m_rEditSh.GetDoc(); - if( pFnd ) + if( pStatus ) { - // replace also last colon of keywords surrounded by colons (for example, ":name:") - const bool replaceLastChar = sFrameText.getLength() > nEndPos && pFnd->GetShort()[0] == ':' - && pFnd->GetShort().endsWith(":"); - - SwPosition aStartPos( pFrame->MapViewToModelPos(TextFrameIndex(rSttPos) )); - SwPosition aEndPos( pFrame->MapViewToModelPos(TextFrameIndex(nEndPos + (replaceLastChar ? 1 : 0))) ); - SwPaM aPam(aStartPos, aEndPos); - - // don't replace, if a redline starts or ends within the original text - if ( pDoc->getIDocumentRedlineAccess().HasRedline( aPam, RedlineType::Any, /*bStartOrEndInRange=*/true ) ) - { - return bRet; - } + sal_Int32 minSttPos = sttPos; + do { + const SvxAutocorrWord* pFnd = pStatus->GetAutocorrWord(); + // replace also last colon of keywords surrounded by colons + // (for example, ":name:") + const bool replaceLastChar = sFrameText.getLength() > nEndPos + && pFnd->GetShort()[0] == ':' + && pFnd->GetShort().endsWith(":"); + + SwPosition aStartPos( pFrame->MapViewToModelPos(TextFrameIndex(sttPos)) ); + SwPosition aEndPos( pFrame->MapViewToModelPos(TextFrameIndex(nEndPos + (replaceLastChar ? 1 : 0))) ); + SwPaM aPam(aStartPos, aEndPos); + + // don't replace, if a redline starts or ends within the original text + if ( pDoc->getIDocumentRedlineAccess().HasRedline( aPam, RedlineType::Any, /*bStartOrEndInRange=*/true ) ) + { + return bRet; + } - if( pFnd->IsTextOnly() ) - { - //JP 22.04.99: Bug 63883 - Special treatment for dots. - const bool bLastCharIsPoint - = nEndPos < sFrameText.getLength() && ('.' == sFrameText[nEndPos]); - if( !bLastCharIsPoint || pFnd->GetLong().isEmpty() || - '.' != pFnd->GetLong()[ pFnd->GetLong().getLength() - 1 ] ) + if( pFnd->IsTextOnly() ) { - // replace the selection - std::vector<std::shared_ptr<SwUnoCursor>> ranges; - if (sw::GetRanges(ranges, *m_rEditSh.GetDoc(), aPam)) + //JP 22.04.99: Bug 63883 - Special treatment for dots. + const bool bLastCharIsPoint + = nEndPos < sFrameText.getLength() && ('.' == sFrameText[nEndPos]); + if( !bLastCharIsPoint || pFnd->GetLong().isEmpty() || + '.' != pFnd->GetLong()[ pFnd->GetLong().getLength() - 1 ] ) { - pDoc->getIDocumentContentOperations().ReplaceRange(aPam, pFnd->GetLong(), false); - bRet = true; - } - else if (!ranges.empty()) - { - assert(ranges.front()->GetPoint()->GetNode() == ranges.front()->GetMark()->GetNode()); + // replace the selection + std::vector<std::shared_ptr<SwUnoCursor>> ranges; + bool noRedlines = sw::GetRanges(ranges, + *m_rEditSh.GetDoc(), aPam); + OSL_ENSURE(noRedlines, "redlines should have been blocked."); + OSL_ENSURE(ranges.empty(), "no redlines expected here."); + pDoc->getIDocumentContentOperations().ReplaceRange( - *ranges.front(), pFnd->GetLong(), false); - for (auto it = ranges.begin() + 1; it != ranges.end(); ++it) - { - DeleteSelImpl(**it); - } + aPam, pFnd->GetLong(), false); + nEndPos = sttPos + pFnd->GetLong().getLength(); bRet = true; - } - // tdf#83260 After calling sw::DocumentContentOperationsManager::ReplaceRange - // pTextNd may become invalid when change tracking is on and Edit -> Track Changes -> Show == OFF. - // ReplaceRange shows changes, this moves deleted nodes from special section to document. - // Then Show mode is disabled again. As a result pTextNd may be invalidated. - pTextNd = m_rCursor.GetPointNode().GetTextNode(); + // tdf#83260 After calling sw::DocumentContentOperationsManager::ReplaceRange + // pTextNd may become invalid when change tracking is on and Edit -> Track Changes -> Show == OFF. + // ReplaceRange shows changes, this moves deleted nodes from special section to document. + // Then Show mode is disabled again. As a result pTextNd may be invalidated. + pTextNd = m_rCursor.GetPointNode().GetTextNode(); + } } - } - else - { - SwTextBlocks aTBlks( rACorrect.GetAutoCorrFileName( aLanguageTag, false, true )); - sal_uInt16 nPos = aTBlks.GetIndex( pFnd->GetShort() ); - if( USHRT_MAX != nPos && aTBlks.BeginGetDoc( nPos ) ) + else { - DeleteSel( aPam ); - pDoc->DontExpandFormat( *aPam.GetPoint() ); - - if( pPara ) + SwTextBlocks aTBlks( rACorrect.GetAutoCorrFileName( aLanguageTag, false, true )); + sal_uInt16 nPos = aTBlks.GetIndex( pFnd->GetShort() ); + if( USHRT_MAX != nPos && aTBlks.BeginGetDoc( nPos ) ) { - OSL_ENSURE( !m_oIndex, "who has not deleted his Index?" ); - m_oIndex.emplace(m_rCursor.GetPoint()->GetNode()); - sw::GotoPrevLayoutTextFrame(*m_oIndex, m_rEditSh.GetLayout()); - } + DeleteSel( aPam ); + pDoc->DontExpandFormat( *aPam.GetPoint() ); - SwDoc* pAutoDoc = aTBlks.GetDoc(); - SwNodeIndex aSttIdx( pAutoDoc->GetNodes().GetEndOfExtras(), 1 ); - SwContentNode* pContentNd = SwNodes::GoNext(&aSttIdx); - SwPaM aCpyPam( aSttIdx ); + if( pPara ) + { + OSL_ENSURE( !m_oIndex, "who has not deleted his Index?" ); + m_oIndex.emplace(m_rCursor.GetPoint()->GetNode()); + sw::GotoPrevLayoutTextFrame(*m_oIndex, m_rEditSh.GetLayout()); + } - const SwTableNode* pTableNd = pContentNd->FindTableNode(); - if( pTableNd ) - { - aCpyPam.GetPoint()->Assign( *pTableNd ); - } - aCpyPam.SetMark(); + SwDoc* pAutoDoc = aTBlks.GetDoc(); + SwNodeIndex aSttIdx( pAutoDoc->GetNodes().GetEndOfExtras(), 1 ); + SwContentNode* pContentNd = SwNodes::GoNext(&aSttIdx); + SwPaM aCpyPam( aSttIdx ); + + const SwTableNode* pTableNd = pContentNd->FindTableNode(); + if( pTableNd ) + { + aCpyPam.GetPoint()->Assign( *pTableNd ); + } + aCpyPam.SetMark(); - // then until the end of the Nodes Array - aCpyPam.GetPoint()->Assign( pAutoDoc->GetNodes().GetEndOfContent(), SwNodeOffset(-1) ); - pContentNd = aCpyPam.GetPointContentNode(); - if (pContentNd) - aCpyPam.GetPoint()->SetContent( pContentNd->Len() ); + // then until the end of the Nodes Array + aCpyPam.GetPoint()->Assign( pAutoDoc->GetNodes().GetEndOfContent(), SwNodeOffset(-1) ); + pContentNd = aCpyPam.GetPointContentNode(); + if (pContentNd) + aCpyPam.GetPoint()->SetContent( pContentNd->Len() ); - SwDontExpandItem aExpItem; - aExpItem.SaveDontExpandItems( *aPam.GetPoint() ); + SwDontExpandItem aExpItem; + aExpItem.SaveDontExpandItems( *aPam.GetPoint() ); - pAutoDoc->getIDocumentContentOperations().CopyRange(aCpyPam, *aPam.GetPoint(), SwCopyFlags::CheckPosInFly); + pAutoDoc->getIDocumentContentOperations().CopyRange(aCpyPam, *aPam.GetPoint(), SwCopyFlags::CheckPosInFly); - aExpItem.RestoreDontExpandItems( *aPam.GetPoint() ); + aExpItem.RestoreDontExpandItems( *aPam.GetPoint() ); - if( pPara ) - { - sw::GotoNextLayoutTextFrame(*m_oIndex, m_rEditSh.GetLayout()); - pTextNd = m_oIndex->GetNode().GetTextNode(); + if( pPara ) + { + sw::GotoNextLayoutTextFrame(*m_oIndex, m_rEditSh.GetLayout()); + pTextNd = m_oIndex->GetNode().GetTextNode(); + } + bRet = true; } - bRet = true; + aTBlks.EndGetDoc(); } - aTBlks.EndGetDoc(); - } + if( sttPos < minSttPos) { + minSttPos = sttPos; + } + sttPos = rSttPos; + sFrameText = pFrame->GetText(); + } while( SvxAutoCorrect::SearchWordsNext(sFrameText, sttPos, nEndPos, + *pStatus) ); + rSttPos = minSttPos; } if( bRet && pPara && pTextNd )