sw/inc/IDocumentMarkAccess.hxx | 14 ++++++++++++++ sw/source/core/doc/docbm.cxx | 30 +++++++++++++++++++++++++----- sw/source/core/inc/MarkManager.hxx | 9 ++++++++- sw/source/core/unocore/unobkm.cxx | 5 ----- sw/source/filter/basflt/shellio.cxx | 6 ++++++ 5 files changed, 53 insertions(+), 11 deletions(-)
New commits: commit 55f087b48b7b8e6341a0e9abf70695b574689a3e Author: Mike Kaganski <mike.kagan...@collabora.com> AuthorDate: Thu May 15 09:55:49 2025 +0500 Commit: Mike Kaganski <mike.kagan...@collabora.com> CommitDate: Thu May 15 09:03:04 2025 +0200 tdf#165918: speed up mark names uniqueness check MarkManager::makeMark call can be quite expensive, when it needs to ensure that the passed name is unique. It needs to iterate over all the existing marks in m_vAllMarks, checking their names. At import time, this creates quadratic complexity, even when the passed names are already all unique. This change introduces a dedicated performance mode, when names are not deduplicated in load time. When the mode is ended, an optimized loop is performed, using a set of names for much faster uniqueness check. Depending on the number of bookmarks, I saw the improvement of the test documents loading times up to 40% (like 190 s -> 115 s; of course, realistic documents would have more modest improvements). Change-Id: I4d624ea6cd7268e7bcfdefecac6d3f1bb58edc28 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/185340 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> diff --git a/sw/inc/IDocumentMarkAccess.hxx b/sw/inc/IDocumentMarkAccess.hxx index 68d39db2fd60..1575232c5eab 100644 --- a/sw/inc/IDocumentMarkAccess.hxx +++ b/sw/inc/IDocumentMarkAccess.hxx @@ -96,6 +96,20 @@ class IDocumentMarkAccess const SwPaM& rPaM, const SwMarkName& rName ) = 0; + /** A performance optimization mode + + Creating and inserting a lot of marks, the checks in makeMark if name is unique may + become a bottleneck, because there we have to iterate over all marks, checking their + names, which creates a quadratic complexity. This may e.g. slow down loading documents + with thousands of bookmarks. + + When the check is disabled using disableUniqueNameChecks, duplicate names are allowed. + When the check is eventually enabled using enableUniqueNameChecks, one pass over all + marks is performed, and all duplicated names are made unique. + */ + virtual void disableUniqueNameChecks() = 0; + virtual void enableUniqueNameChecks() = 0; + /** Returns a mark in the document for a paragraph. If there is none, a mark will be created. diff --git a/sw/source/core/doc/docbm.cxx b/sw/source/core/doc/docbm.cxx index 135dbc561bba..4a17c6e198db 100644 --- a/sw/source/core/doc/docbm.cxx +++ b/sw/source/core/doc/docbm.cxx @@ -500,6 +500,22 @@ namespace sw::mark , m_pLastActiveFieldmark(nullptr) { } + void MarkManager::disableUniqueNameChecks() { m_bCheckUniqueNames = false; } + void MarkManager::enableUniqueNameChecks() + { + if (m_bCheckUniqueNames) + return; + // Make sure that all names are unique + std::unordered_set<OUString> usedNames; + for (auto& pMark : m_vAllMarks) + { + assert(pMark); + pMark->SetName(getUniqueMarkName(pMark->GetName(), [&usedNames](const SwMarkName& n) + { return usedNames.insert(n.toString()).second; })); + } + m_bCheckUniqueNames = true; + } + ::sw::mark::MarkBase* MarkManager::makeMark(const SwPaM& rPaM, const SwMarkName& rName, const IDocumentMarkAccess::MarkType eType, @@ -616,8 +632,10 @@ namespace sw::mark pMark->Swap(); // for performance reasons, we trust UnoMarks to have a (generated) unique name - if ( eType != IDocumentMarkAccess::MarkType::UNO_BOOKMARK ) - pMark->SetName( getUniqueMarkName( pMark->GetName() ) ); + if (eType != IDocumentMarkAccess::MarkType::UNO_BOOKMARK && m_bCheckUniqueNames) + pMark->SetName(getUniqueMarkName( + pMark->GetName(), [this](const SwMarkName& n) + { return lcl_FindMarkByName(n, m_vAllMarks) == m_vAllMarks.end(); })); // insert any dummy chars before inserting into sorted vectors pMark->InitDoc(m_rDoc, eMode, pSepPos); @@ -1760,7 +1778,9 @@ namespace sw::mark } } - SwMarkName MarkManager::getUniqueMarkName(const SwMarkName& rName) const + template <class IsNameUniqueFunc> + requires std::is_invocable_r_v<bool, IsNameUniqueFunc, const SwMarkName&> + SwMarkName MarkManager::getUniqueMarkName(const SwMarkName& rName, IsNameUniqueFunc f) const { OSL_ENSURE(rName.toString().getLength(), "<MarkManager::getUniqueMarkName(..)> - a name should be proposed"); @@ -1772,7 +1792,7 @@ namespace sw::mark return SwMarkName(newName); } - if (lcl_FindMarkByName(rName, m_vAllMarks) == m_vAllMarks.end()) + if (f(rName)) { return rName; } @@ -1790,7 +1810,7 @@ namespace sw::mark { sTmp = aPrefix + OUString::number(nCnt); nCnt++; - if (lcl_FindMarkByName(SwMarkName(sTmp), m_vAllMarks) == m_vAllMarks.end()) + if (f(SwMarkName(sTmp))) { break; } diff --git a/sw/source/core/inc/MarkManager.hxx b/sw/source/core/inc/MarkManager.hxx index cccea7084f65..f4ee394d3f5b 100644 --- a/sw/source/core/inc/MarkManager.hxx +++ b/sw/source/core/inc/MarkManager.hxx @@ -57,6 +57,9 @@ namespace sw::mark { const SwPaM& rPaM, const SwMarkName& rName ) override; + virtual void disableUniqueNameChecks() override; + virtual void enableUniqueNameChecks() override; + virtual void repositionMark(::sw::mark::MarkBase* io_pMark, const SwPaM& rPaM) override; virtual bool renameMark(::sw::mark::MarkBase* io_pMark, const SwMarkName& rNewName) override; virtual void correctMarksAbsolute(const SwNode& rOldNode, const SwPosition& rNewPos, const sal_Int32 nOffset) override; @@ -140,7 +143,9 @@ namespace sw::mark { MarkManager& operator=(MarkManager const&) = delete; // make names - SwMarkName getUniqueMarkName(const SwMarkName& rName) const; + template <class IsNameUniqueFunc> + requires std::is_invocable_r_v<bool, IsNameUniqueFunc, const SwMarkName&> + SwMarkName getUniqueMarkName(const SwMarkName& rName, IsNameUniqueFunc f) const; void sortSubsetMarks(); void sortMarks(); @@ -161,6 +166,8 @@ namespace sw::mark { SwDoc& m_rDoc; sw::mark::FieldmarkWithDropDownButton* m_pLastActiveFieldmark; + + bool m_bCheckUniqueNames = true; }; } diff --git a/sw/source/core/unocore/unobkm.cxx b/sw/source/core/unocore/unobkm.cxx index 60ad2ec8394c..f66494e164b4 100644 --- a/sw/source/core/unocore/unobkm.cxx +++ b/sw/source/core/unocore/unobkm.cxx @@ -161,11 +161,6 @@ rtl::Reference<SwXBookmark> SwXBookmark::CreateXBookmark( } if (!xBookmark.is()) { - OSL_ENSURE(!pBookmark || - dynamic_cast< ::sw::mark::Bookmark* >(pBookmark) || - IDocumentMarkAccess::GetType(*pBookmark) == IDocumentMarkAccess::MarkType::ANNOTATIONMARK, - "<SwXBookmark::GetObject(..)>" - "SwXBookmark requested for non-bookmark mark and non-annotation mark."); xBookmark = pBookmark ? new SwXBookmark(&rDoc) : new SwXBookmark; xBookmark->m_pImpl->registerInMark(*xBookmark, pBookmark); } diff --git a/sw/source/filter/basflt/shellio.cxx b/sw/source/filter/basflt/shellio.cxx index 88cef06127e7..abb516422b88 100644 --- a/sw/source/filter/basflt/shellio.cxx +++ b/sw/source/filter/basflt/shellio.cxx @@ -201,8 +201,14 @@ ErrCodeMsg SwReader::Read( const Reader& rOptions ) mxDoc->getIDocumentRedlineAccess().SetRedlineFlags_intern( eOld ); + // Preformance mode: import all bookmarks names as defined in the document + mxDoc->getIDocumentMarkAccess()->disableUniqueNameChecks(); + nError = po->Read( *mxDoc, msBaseURL, *pPam, maFileName ); + // End performance mode: now make sure that all names are unique + mxDoc->getIDocumentMarkAccess()->enableUniqueNameChecks(); + // an ODF document may contain redline mode in settings.xml; save it! ePostReadRedlineFlags = mxDoc->getIDocumentRedlineAccess().GetRedlineFlags();