writerfilter/CppunitTest_writerfilter_ooxml.mk | 54 ++++++++++ writerfilter/Module_writerfilter.mk | 1 writerfilter/qa/cppunittests/ooxml/data/floattable-tables-lost.docx |binary writerfilter/qa/cppunittests/ooxml/ooxml.cxx | 48 ++++++++ writerfilter/source/ooxml/OOXMLFastContextHandler.cxx | 27 +++++ writerfilter/source/ooxml/OOXMLParserState.cxx | 10 + writerfilter/source/ooxml/OOXMLParserState.hxx | 9 + 7 files changed, 149 insertions(+)
New commits: commit fc53a8a2726b68c0e756ce54049a99f87872ca5e Author: Miklos Vajna <vmik...@collabora.com> AuthorDate: Mon Jun 5 08:57:05 2023 +0200 Commit: Miklos Vajna <vmik...@collabora.com> CommitDate: Wed Jun 7 08:18:38 2023 +0200 sw floattable: fix lost tables around a floating table from DOCX The DOCX file has 3 floating tables: the first one is immediately followed by a second one, and the second one also has an inner table. The problem is that somehow dmapper thinks it's fine to merge the two outer table together, than the inner table invalidates some of its cell start/end references, so at the end the outer table conversion fails, which means we only have 1 table, not 3. This is a bit similar to commit 1c99616f86f7d5b83b91edc225fc95fec227d710 (sw floattable, crashtesting: fix PDF export of forum-mso-en3-26783.docx, 2023-05-02), but here fixing the problem at the dmapper level sounded too complex. Instead inject the text node that'll serve as an anchor point at a tokenizer level. With this, the DOCX version of the bnc#816603 bugdoc (the one that started all workarounds in 2013) has one less overlapping table, but it still has problems. (cherry picked from commit 01ad8ec4bb5425446e95dbada81de435646824b4) Change-Id: I54307b4c554da51bb55287e61bca700d8343d11c Reviewed-on: https://gerrit.libreoffice.org/c/core/+/152642 Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoff...@gmail.com> Reviewed-by: Miklos Vajna <vmik...@collabora.com> diff --git a/writerfilter/CppunitTest_writerfilter_ooxml.mk b/writerfilter/CppunitTest_writerfilter_ooxml.mk new file mode 100644 index 000000000000..8d5d214f2470 --- /dev/null +++ b/writerfilter/CppunitTest_writerfilter_ooxml.mk @@ -0,0 +1,54 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +#************************************************************************* +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +#************************************************************************* + +$(eval $(call gb_CppunitTest_CppunitTest,writerfilter_ooxml)) + +$(eval $(call gb_CppunitTest_use_externals,writerfilter_ooxml,\ + boost_headers \ +)) + +$(eval $(call gb_CppunitTest_add_exception_objects,writerfilter_ooxml, \ + writerfilter/qa/cppunittests/ooxml/ooxml \ +)) + +$(eval $(call gb_CppunitTest_use_libraries,writerfilter_ooxml, \ + basegfx \ + comphelper \ + cppu \ + cppuhelper \ + oox \ + sal \ + subsequenttest \ + test \ + unotest \ + utl \ + tl \ + vcl \ +)) + +$(eval $(call gb_CppunitTest_use_sdk_api,writerfilter_ooxml)) + +$(eval $(call gb_CppunitTest_use_ure,writerfilter_ooxml)) +$(eval $(call gb_CppunitTest_use_vcl,writerfilter_ooxml)) + +$(eval $(call gb_CppunitTest_use_rdb,writerfilter_ooxml,services)) + +$(eval $(call gb_CppunitTest_use_custom_headers,writerfilter_ooxml,\ + officecfg/registry \ +)) + +$(eval $(call gb_CppunitTest_use_configuration,writerfilter_ooxml)) + +# we need to explicitly depend on library writerfilter because it is not implied +# by a link relation +$(call gb_CppunitTest_get_target,writerfilter_ooxml) : $(call gb_Library_get_target,writerfilter) + +# vim: set noet sw=4 ts=4: diff --git a/writerfilter/Module_writerfilter.mk b/writerfilter/Module_writerfilter.mk index 46d7af0ff63a..587bf4fa4dfc 100644 --- a/writerfilter/Module_writerfilter.mk +++ b/writerfilter/Module_writerfilter.mk @@ -18,6 +18,7 @@ $(eval $(call gb_Module_add_slowcheck_targets,writerfilter,\ CppunitTest_writerfilter_filters_test \ CppunitTest_writerfilter_misc \ CppunitTest_writerfilter_dmapper \ + CppunitTest_writerfilter_ooxml \ CppunitTest_writerfilter_rtftok \ )) diff --git a/writerfilter/qa/cppunittests/ooxml/data/floattable-tables-lost.docx b/writerfilter/qa/cppunittests/ooxml/data/floattable-tables-lost.docx new file mode 100644 index 000000000000..5d1d9624c8a5 Binary files /dev/null and b/writerfilter/qa/cppunittests/ooxml/data/floattable-tables-lost.docx differ diff --git a/writerfilter/qa/cppunittests/ooxml/ooxml.cxx b/writerfilter/qa/cppunittests/ooxml/ooxml.cxx new file mode 100644 index 000000000000..fb790252becb --- /dev/null +++ b/writerfilter/qa/cppunittests/ooxml/ooxml.cxx @@ -0,0 +1,48 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <test/unoapi_test.hxx> + +#include <com/sun/star/text/XTextTablesSupplier.hpp> + +using namespace ::com::sun::star; + +namespace +{ +/// Tests for writerfilter/source/ooxml/. +class Test : public UnoApiTest +{ +public: + Test() + : UnoApiTest("/writerfilter/qa/cppunittests/ooxml/data/") + { + } +}; + +CPPUNIT_TEST_FIXTURE(Test, testFloatingTablesLost) +{ + // Given a document with 2 floating tables, the 2nd has an inner floating table as well: + loadFromURL(u"floattable-tables-lost.docx"); + + // When counting the created Writer tables: + uno::Reference<text::XTextTablesSupplier> xTextDocument(mxComponent, uno::UNO_QUERY); + uno::Reference<container::XIndexAccess> xTables(xTextDocument->getTextTables(), uno::UNO_QUERY); + + // Then make sure that all 3 tables are imported: + // Without the accompanying fix in place, this test would have failed with: + // - Expected: 3 + // - Actual : 1 + // i.e. only the inner table was imported, the 2 others were lost. + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(3), xTables->getCount()); +} +} + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx index da7642bf0fc8..51eee79e81a3 100644 --- a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx +++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx @@ -413,6 +413,11 @@ void OOXMLFastContextHandler::startParagraphGroup() if (!isForwardEvents()) return; + if (mpParserState->GetFloatingTableEnded()) + { + mpParserState->SetFloatingTableEnded(false); + } + if (mpParserState->isInParagraphGroup()) endParagraphGroup(); @@ -1613,6 +1618,14 @@ void OOXMLFastContextHandlerTextTable::lcl_startFastElement (Token_t /*Element*/, const uno::Reference< xml::sax::XFastAttributeList > & /*Attribs*/) { + if (mpParserState->GetFloatingTableEnded()) + { + // We're starting a new table, but the previous table was floating. Insert a dummy paragraph + // to ensure that the floating table has a suitable anchor. + startParagraphGroup(); + endOfParagraph(); + } + mpParserState->startTable(); mnTableDepth++; @@ -1639,6 +1652,20 @@ void OOXMLFastContextHandlerTextTable::lcl_endFastElement mpParserState->setCharacterProperties(pProps); mnTableDepth--; + + OOXMLPropertySet::Pointer_t pTableProps = mpParserState->GetTableProperties(); + if (pTableProps) + { + for (const auto& rTableProp : *pTableProps) + { + if (rTableProp->getId() == NS_ooxml::LN_CT_TblPrBase_tblpPr) + { + mpParserState->SetFloatingTableEnded(true); + break; + } + } + } + mpParserState->endTable(); } diff --git a/writerfilter/source/ooxml/OOXMLParserState.cxx b/writerfilter/source/ooxml/OOXMLParserState.cxx index a11afde8fb37..7d972ae29dfc 100644 --- a/writerfilter/source/ooxml/OOXMLParserState.cxx +++ b/writerfilter/source/ooxml/OOXMLParserState.cxx @@ -207,6 +207,16 @@ void OOXMLParserState::setTableProperties(const OOXMLPropertySet::Pointer_t& pPr } } +OOXMLPropertySet::Pointer_t OOXMLParserState::GetTableProperties() const +{ + if (mTableProps.empty()) + { + return nullptr; + } + + return mTableProps.top(); +} + // tdf#108714 void OOXMLParserState::resolvePostponedBreak(Stream & rStream) { diff --git a/writerfilter/source/ooxml/OOXMLParserState.hxx b/writerfilter/source/ooxml/OOXMLParserState.hxx index 5bb9a7dc7900..ca5c60e7f9e7 100644 --- a/writerfilter/source/ooxml/OOXMLParserState.hxx +++ b/writerfilter/source/ooxml/OOXMLParserState.hxx @@ -58,6 +58,8 @@ class OOXMLParserState final : public virtual SvRefBase std::vector<SavedAlternateState> maSavedAlternateStates; std::vector<OOXMLPropertySet::Pointer_t> mvPostponedBreaks; bool mbStartFootnote; + /// We just ended a floating table. Starting a paragraph or table resets this. + bool m_bFloatingTableEnded = false; public: typedef tools::SvRef<OOXMLParserState> Pointer_t; @@ -101,6 +103,7 @@ public: void setRowProperties(const OOXMLPropertySet::Pointer_t& pProps); void resolveTableProperties(Stream& rStream); void setTableProperties(const OOXMLPropertySet::Pointer_t& pProps); + OOXMLPropertySet::Pointer_t GetTableProperties() const; // tdf#108714 void resolvePostponedBreak(Stream& rStream); void setPostponedBreak(const OOXMLPropertySet::Pointer_t& pProps); @@ -115,6 +118,12 @@ public: void setStartFootnote(bool bStartFootnote); bool isStartFootnote() const { return mbStartFootnote; } + + void SetFloatingTableEnded(bool bFloatingTableEnded) + { + m_bFloatingTableEnded = bFloatingTableEnded; + } + bool GetFloatingTableEnded() const { return m_bFloatingTableEnded; } }; }