sw/inc/IDocumentMarkAccess.hxx | 14 ++++++ sw/qa/extras/unowriter/data/bookmark_1.html | 4 + sw/qa/extras/unowriter/unowriter.cxx | 37 +++++++++++++++++ sw/source/core/doc/docbm.cxx | 58 +++++++++++++++++++++++++--- sw/source/core/inc/MarkManager.hxx | 14 ++++++ sw/source/core/unocore/unobkm.cxx | 5 -- sw/source/filter/basflt/shellio.cxx | 10 ++++ 7 files changed, 130 insertions(+), 12 deletions(-)
New commits: commit 4a97035ec911e238dccc77a67afc52081c4fbded Author: Mike Kaganski <mike.kagan...@collabora.com> AuthorDate: Thu May 15 17:45:55 2025 +0500 Commit: Mike Kaganski <mike.kagan...@collabora.com> CommitDate: Fri May 16 17:52:45 2025 +0500 Related: tdf#165918 Avoid renaming pre-existing bookmarks Thanks Michael Stahl, for pointing this: > what happens when you first insert a bookmark, then insert a file that > contains a bookmark with the same name at a place in the document before > the pre-existing one - the SwHistoryBookmark uses the name of the bookmark > to find it, and this loop would rename the pre-exsiting bookmark, so on > undo of insert bookmark it can't be found? ( https://gerrit.libreoffice.org/c/core/+/185340/comment/274a3fba_aa363535/ ) This change avoids this problem by preparing a set of already existing names at the start of the performance mode, and only checking the marks that were added while in the performance mode. Change-Id: Id4dc1809871fd4a998396d091f2f920ecd71b7d9 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/185363 Tested-by: Jenkins Reviewed-by: Michael Stahl <michael.st...@allotropia.de> Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> diff --git a/sw/qa/extras/unowriter/data/bookmark_1.html b/sw/qa/extras/unowriter/data/bookmark_1.html new file mode 100644 index 000000000000..225817dc6729 --- /dev/null +++ b/sw/qa/extras/unowriter/data/bookmark_1.html @@ -0,0 +1,4 @@ +<html> +<body><a name="Bookmark 1">abc</a> +</body> +</html> \ No newline at end of file diff --git a/sw/qa/extras/unowriter/unowriter.cxx b/sw/qa/extras/unowriter/unowriter.cxx index 565d1b6d9433..c02233e3382c 100644 --- a/sw/qa/extras/unowriter/unowriter.cxx +++ b/sw/qa/extras/unowriter/unowriter.cxx @@ -16,6 +16,7 @@ #include <com/sun/star/frame/XDispatchProviderInterception.hpp> #include <com/sun/star/frame/XDispatchProviderInterceptor.hpp> #include <com/sun/star/table/XCellRange.hpp> +#include <com/sun/star/text/ControlCharacter.hpp> #include <com/sun/star/text/TextContentAnchorType.hpp> #include <com/sun/star/text/AutoTextContainer.hpp> #include <com/sun/star/text/VertOrientation.hpp> @@ -1353,6 +1354,42 @@ CPPUNIT_TEST_FIXTURE(SwUnoWriter, testTdf164885) CPPUNIT_ASSERT_EQUAL(u".uno:Open"_ustr, interceptor->pDispatch->sLastCommand); } +CPPUNIT_TEST_FIXTURE(SwUnoWriter, testMarkWithPreexistingNameInsertion) +{ + createSwDoc(); + + // Add a second paragraph, and create a bookmark there, with a specific name + auto xTextDocument = mxComponent.queryThrow<text::XTextDocument>(); + auto xText = xTextDocument->getText(); + xText->insertControlCharacter(xText->getEnd(), text::ControlCharacter::PARAGRAPH_BREAK, false); + + auto xFac = mxComponent.queryThrow<lang::XMultiServiceFactory>(); + auto xMark = xFac->createInstance(u"com.sun.star.text.Bookmark"_ustr); + auto xNamed = xMark.queryThrow<container::XNamed>(); + xNamed->setName(u"Bookmark 1"_ustr); + xText->insertTextContent(xText->getEnd(), xMark.queryThrow<text::XTextContent>(), false); + + // Insert the content of a file, which has a bookmark with the same name, before existing one + dispatchCommand( + mxComponent, u".uno:InsertDoc"_ustr, + { comphelper::makePropertyValue(u"Name"_ustr, createFileURL(u"bookmark_1.html")) }); + + // The pre-existing bookmark's name must not change + // Before the fix, this would fail with "Actual : Bookmark 1 Copy 1" + CPPUNIT_ASSERT_EQUAL(u"Bookmark 1"_ustr, xNamed->getName()); + + auto xSupplier = mxComponent.queryThrow<text::XBookmarksSupplier>(); + auto xBookmarks = xSupplier->getBookmarks(); + auto names = xBookmarks->getElementNames(); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), names.getLength()); + // names[1] is the pre-existing bookmark + CPPUNIT_ASSERT_EQUAL(u"Bookmark 1"_ustr, names[1]); + // names[0] is the bookmark coming from the inserted file + OUString rest; + CPPUNIT_ASSERT(names[0].startsWith("Bookmark 1", &rest)); + CPPUNIT_ASSERT(!rest.isEmpty()); // should be " Copy 1" +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sw/source/core/doc/docbm.cxx b/sw/source/core/doc/docbm.cxx index d51e76725e1c..57729c703cfd 100644 --- a/sw/source/core/doc/docbm.cxx +++ b/sw/source/core/doc/docbm.cxx @@ -565,19 +565,37 @@ namespace sw::mark , m_pLastActiveFieldmark(nullptr) { } - void MarkManager::disableUniqueNameChecks() { m_bCheckUniqueNames = false; } + // In the mode where the names are not checked, we need to avoid a case where there was a + // bookmark, and a file is inserted at an earlier point, with the same-name bookmark, causing + // a rename of the pre-existing bookmark. m_aUsedNames and m_vUncheckedNameMarks are used for + // that. m_aUsedNames is pre-populated with the existing names (at the moment when the mode + // started); and m_vUncheckedNameMarks stores only the marks needing the checks. + + void MarkManager::disableUniqueNameChecks() + { + if (!m_bCheckUniqueNames) + return; // nested call + m_bCheckUniqueNames = false; + assert(m_aUsedNames.empty()); + + // Populate the pre-existing already deduplicated names + for (auto& pMark : m_vAllMarks) + m_aUsedNames.insert(pMark->GetName()); + } void MarkManager::enableUniqueNameChecks() { if (m_bCheckUniqueNames) return; - // Make sure that all names are unique - std::unordered_set<OUString> usedNames; + // Make sure that all previously unchecked names are unique for (auto& pMark : m_vAllMarks) { - assert(pMark); - pMark->SetName(getUniqueMarkName(pMark->GetName(), [&usedNames](const OUString& n) - { return usedNames.insert(n).second; })); + if (!m_vUncheckedNameMarks.contains(pMark)) + continue; // mark was added and removed while in the performance mode + pMark->SetName(getUniqueMarkName(pMark->GetName(), [this](const OUString& n) + { return m_aUsedNames.insert(n).second; })); } + m_aUsedNames.clear(); + m_vUncheckedNameMarks.clear(); m_bCheckUniqueNames = true; } @@ -697,10 +715,19 @@ namespace sw::mark pMark->Swap(); // for performance reasons, we trust UnoMarks to have a (generated) unique name - if (eType != IDocumentMarkAccess::MarkType::UNO_BOOKMARK && m_bCheckUniqueNames) - pMark->SetName(getUniqueMarkName( - pMark->GetName(), [this](const OUString& n) - { return lcl_FindMarkByName(n, m_vAllMarks.begin(), m_vAllMarks.end()) == m_vAllMarks.end(); })); + if (eType != IDocumentMarkAccess::MarkType::UNO_BOOKMARK) + { + if (m_bCheckUniqueNames) + { + pMark->SetName(getUniqueMarkName( + pMark->GetName(), [this](const OUString& n) + { return lcl_FindMarkByName(n, m_vAllMarks.begin(), m_vAllMarks.end()) == m_vAllMarks.end(); })); + } + else + { + m_vUncheckedNameMarks.insert(pMark.get()); + } + } // insert any dummy chars before inserting into sorted vectors pMark->InitDoc(m_rDoc, eMode, pSepPos); @@ -1411,6 +1438,7 @@ namespace sw::mark m_vFieldmarks.clear(); m_vBookmarks.clear(); m_vAnnotationMarks.clear(); + m_vUncheckedNameMarks.clear(); for (const auto & p : m_vAllMarks) delete p; m_vAllMarks.clear(); diff --git a/sw/source/core/inc/MarkManager.hxx b/sw/source/core/inc/MarkManager.hxx index 2f8c3aecff65..bb03ee530666 100644 --- a/sw/source/core/inc/MarkManager.hxx +++ b/sw/source/core/inc/MarkManager.hxx @@ -152,6 +152,11 @@ namespace sw::mark { // container for all marks, this container owns the objects it points to container_t m_vAllMarks; + // container for all marks with possibly duplicating names (m_bCheckUniqueNames mode) + std::unordered_set<sw::mark::MarkBase*> m_vUncheckedNameMarks; + // container for deduplicating names (m_bCheckUniqueNames mode) + std::unordered_set<OUString> m_aUsedNames; + // additional container for bookmarks container_t m_vBookmarks; // additional container for fieldmarks diff --git a/sw/source/filter/basflt/shellio.cxx b/sw/source/filter/basflt/shellio.cxx index 98ac9340618b..48e494b33b0d 100644 --- a/sw/source/filter/basflt/shellio.cxx +++ b/sw/source/filter/basflt/shellio.cxx @@ -18,6 +18,7 @@ */ #include <hintids.hxx> +#include <comphelper/scopeguard.hxx> #include <osl/diagnose.h> #include <tools/date.hxx> #include <tools/time.hxx> @@ -201,13 +202,14 @@ ErrCodeMsg SwReader::Read( const Reader& rOptions ) mxDoc->getIDocumentRedlineAccess().SetRedlineFlags_intern( eOld ); - // Preformance mode: import all bookmarks names as defined in the document - mxDoc->getIDocumentMarkAccess()->disableUniqueNameChecks(); - - nError = po->Read( *mxDoc, msBaseURL, *pPam, maFileName ); + { + // Preformance mode: import all bookmarks names as defined in the document + mxDoc->getIDocumentMarkAccess()->disableUniqueNameChecks(); + comphelper::ScopeGuard perfModeGuard( + [this]() { mxDoc->getIDocumentMarkAccess()->enableUniqueNameChecks(); }); - // End performance mode: now make sure that all names are unique - mxDoc->getIDocumentMarkAccess()->enableUniqueNameChecks(); + nError = po->Read(*mxDoc, msBaseURL, *pPam, maFileName); + } // an ODF document may contain redline mode in settings.xml; save it! ePostReadRedlineFlags = mxDoc->getIDocumentRedlineAccess().GetRedlineFlags(); commit ba70e3a851f951273849ef148d62c7dac84b2b55 Author: Mike Kaganski <mike.kagan...@collabora.com> AuthorDate: Thu May 15 09:55:49 2025 +0500 Commit: Mike Kaganski <mike.kagan...@collabora.com> CommitDate: Thu May 15 13:04:31 2025 +0500 tdf#165918: speed up mark names uniqueness check MarkManager::makeMark call can be quite expensive, when it needs to ensure that the passed name is unique. It needs to iterate over all the existing marks in m_vAllMarks, checking their names. At import time, this creates quadratic complexity, even when the passed names are already all unique. This change introduces a dedicated performance mode, when names are not deduplicated in load time. When the mode is ended, an optimized loop is performed, using a set of names for much faster uniqueness check. Depending on the number of bookmarks, I saw the improvement of the test documents loading times up to 40% (like 190 s -> 115 s; of course, realistic documents would have more modest improvements). Change-Id: I4d624ea6cd7268e7bcfdefecac6d3f1bb58edc28 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/185340 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> diff --git a/sw/inc/IDocumentMarkAccess.hxx b/sw/inc/IDocumentMarkAccess.hxx index bc8f2e1c03d6..2fdaf39fbbb6 100644 --- a/sw/inc/IDocumentMarkAccess.hxx +++ b/sw/inc/IDocumentMarkAccess.hxx @@ -146,6 +146,20 @@ class IDocumentMarkAccess const SwPaM& rPaM, const OUString& rName ) = 0; + /** A performance optimization mode + + Creating and inserting a lot of marks, the checks in makeMark if name is unique may + become a bottleneck, because there we have to iterate over all marks, checking their + names, which creates a quadratic complexity. This may e.g. slow down loading documents + with thousands of bookmarks. + + When the check is disabled using disableUniqueNameChecks, duplicate names are allowed. + When the check is eventually enabled using enableUniqueNameChecks, one pass over all + marks is performed, and all duplicated names are made unique. + */ + virtual void disableUniqueNameChecks() = 0; + virtual void enableUniqueNameChecks() = 0; + /** Returns a mark in the document for a paragraph. If there is none, a mark will be created. diff --git a/sw/source/core/doc/docbm.cxx b/sw/source/core/doc/docbm.cxx index 730da32625d4..d51e76725e1c 100644 --- a/sw/source/core/doc/docbm.cxx +++ b/sw/source/core/doc/docbm.cxx @@ -565,6 +565,22 @@ namespace sw::mark , m_pLastActiveFieldmark(nullptr) { } + void MarkManager::disableUniqueNameChecks() { m_bCheckUniqueNames = false; } + void MarkManager::enableUniqueNameChecks() + { + if (m_bCheckUniqueNames) + return; + // Make sure that all names are unique + std::unordered_set<OUString> usedNames; + for (auto& pMark : m_vAllMarks) + { + assert(pMark); + pMark->SetName(getUniqueMarkName(pMark->GetName(), [&usedNames](const OUString& n) + { return usedNames.insert(n).second; })); + } + m_bCheckUniqueNames = true; + } + ::sw::mark::IMark* MarkManager::makeMark(const SwPaM& rPaM, const OUString& rName, const IDocumentMarkAccess::MarkType eType, @@ -681,8 +697,10 @@ namespace sw::mark pMark->Swap(); // for performance reasons, we trust UnoMarks to have a (generated) unique name - if ( eType != IDocumentMarkAccess::MarkType::UNO_BOOKMARK ) - pMark->SetName( getUniqueMarkName( pMark->GetName() ) ); + if (eType != IDocumentMarkAccess::MarkType::UNO_BOOKMARK && m_bCheckUniqueNames) + pMark->SetName(getUniqueMarkName( + pMark->GetName(), [this](const OUString& n) + { return lcl_FindMarkByName(n, m_vAllMarks.begin(), m_vAllMarks.end()) == m_vAllMarks.end(); })); // insert any dummy chars before inserting into sorted vectors pMark->InitDoc(m_rDoc, eMode, pSepPos); @@ -1800,7 +1818,9 @@ namespace sw::mark } } - OUString MarkManager::getUniqueMarkName(const OUString& rName) const + template <class IsNameUniqueFunc> + requires std::is_invocable_r_v<bool, IsNameUniqueFunc, const OUString&> + OUString MarkManager::getUniqueMarkName(const OUString& rName, IsNameUniqueFunc f) const { OSL_ENSURE(rName.getLength(), "<MarkManager::getUniqueMarkName(..)> - a name should be proposed"); @@ -1812,7 +1832,7 @@ namespace sw::mark return newName; } - if (lcl_FindMarkByName(rName, m_vAllMarks.begin(), m_vAllMarks.end()) == m_vAllMarks.end()) + if (f(rName)) { return rName; } @@ -1830,7 +1850,7 @@ namespace sw::mark { sTmp = aPrefix + OUString::number(nCnt); nCnt++; - if (lcl_FindMarkByName(sTmp, m_vAllMarks.begin(), m_vAllMarks.end()) == m_vAllMarks.end()) + if (f(sTmp)) { break; } diff --git a/sw/source/core/inc/MarkManager.hxx b/sw/source/core/inc/MarkManager.hxx index dd7eb9f6f18b..2f8c3aecff65 100644 --- a/sw/source/core/inc/MarkManager.hxx +++ b/sw/source/core/inc/MarkManager.hxx @@ -58,6 +58,9 @@ namespace sw::mark { const SwPaM& rPaM, const OUString& rName ) override; + virtual void disableUniqueNameChecks() override; + virtual void enableUniqueNameChecks() override; + virtual void repositionMark(::sw::mark::IMark* io_pMark, const SwPaM& rPaM) override; virtual bool renameMark(::sw::mark::IMark* io_pMark, const OUString& rNewName) override; virtual void correctMarksAbsolute(const SwNode& rOldNode, const SwPosition& rNewPos, const sal_Int32 nOffset) override; @@ -139,7 +142,9 @@ namespace sw::mark { MarkManager& operator=(MarkManager const&) = delete; // make names - OUString getUniqueMarkName(const OUString& rName) const; + template <class IsNameUniqueFunc> + requires std::is_invocable_r_v<bool, IsNameUniqueFunc, const OUString&> + OUString getUniqueMarkName(const OUString& rName, IsNameUniqueFunc f) const; void sortSubsetMarks(); void sortMarks(); @@ -160,6 +165,8 @@ namespace sw::mark { SwDoc& m_rDoc; sw::mark::FieldmarkWithDropDownButton* m_pLastActiveFieldmark; + + bool m_bCheckUniqueNames = true; }; } diff --git a/sw/source/core/unocore/unobkm.cxx b/sw/source/core/unocore/unobkm.cxx index 81818a13d03a..e7cfdf8e3843 100644 --- a/sw/source/core/unocore/unobkm.cxx +++ b/sw/source/core/unocore/unobkm.cxx @@ -168,11 +168,6 @@ rtl::Reference<SwXBookmark> SwXBookmark::CreateXBookmark( } if (!xBookmark.is()) { - OSL_ENSURE(!pBookmark || - dynamic_cast< ::sw::mark::IBookmark* >(pBookmark) || - IDocumentMarkAccess::GetType(*pBookmark) == IDocumentMarkAccess::MarkType::ANNOTATIONMARK, - "<SwXBookmark::GetObject(..)>" - "SwXBookmark requested for non-bookmark mark and non-annotation mark."); SwXBookmark *const pXBookmark = pBookmark ? new SwXBookmark(&rDoc) : new SwXBookmark; xBookmark.set(pXBookmark); diff --git a/sw/source/filter/basflt/shellio.cxx b/sw/source/filter/basflt/shellio.cxx index a092b8e61a9a..98ac9340618b 100644 --- a/sw/source/filter/basflt/shellio.cxx +++ b/sw/source/filter/basflt/shellio.cxx @@ -201,8 +201,14 @@ ErrCodeMsg SwReader::Read( const Reader& rOptions ) mxDoc->getIDocumentRedlineAccess().SetRedlineFlags_intern( eOld ); + // Preformance mode: import all bookmarks names as defined in the document + mxDoc->getIDocumentMarkAccess()->disableUniqueNameChecks(); + nError = po->Read( *mxDoc, msBaseURL, *pPam, maFileName ); + // End performance mode: now make sure that all names are unique + mxDoc->getIDocumentMarkAccess()->enableUniqueNameChecks(); + // an ODF document may contain redline mode in settings.xml; save it! ePostReadRedlineFlags = mxDoc->getIDocumentRedlineAccess().GetRedlineFlags();