sw/qa/extras/ooxmlexport/data/tdf126287.docx |binary sw/qa/extras/ooxmlexport/ooxmlexport17.cxx | 5 ++ writerfilter/source/dmapper/DomainMapper.cxx | 44 ++++++++++++++++++++++ writerfilter/source/dmapper/DomainMapper_Impl.cxx | 15 ++++++- writerfilter/source/dmapper/DomainMapper_Impl.hxx | 4 +- 5 files changed, 65 insertions(+), 3 deletions(-)
New commits: commit 2b1826a0078fc43b0f0f9078561cec7df1dd92df Author: Vasily Melenchuk <vasily.melenc...@cib.de> AuthorDate: Fri Jul 30 17:22:38 2021 +0300 Commit: Xisco Fauli <xiscofa...@libreoffice.org> CommitDate: Wed Feb 9 10:15:52 2022 +0100 tdf#126287: docx import: use defered linebreak In some cases when we have combo linebreak, endparagraph and end section at very bottom of page, MS Word does ignores linebreak to avoid empty page with same section and lonely end paragraph mark. With defered linebreak we could simulate and ignore such linebreaks during import. Change-Id: Ie6ee4c0990ee1c2e853691a76953c83191664e43 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/119709 Tested-by: Jenkins Reviewed-by: Thorsten Behrens <thorsten.behr...@allotropia.de> Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/129661 diff --git a/sw/qa/extras/ooxmlexport/data/tdf126287.docx b/sw/qa/extras/ooxmlexport/data/tdf126287.docx new file mode 100644 index 000000000000..cf2250b9f9a4 Binary files /dev/null and b/sw/qa/extras/ooxmlexport/data/tdf126287.docx differ diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx index af90d8697de7..962574d871b1 100644 --- a/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx +++ b/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx @@ -107,6 +107,11 @@ CPPUNIT_TEST_FIXTURE(Test, testDontAddNewStyles) assertXPath(pXmlDoc, "/w:styles/w:style[@w:styleId='Caption']", 0); } +DECLARE_OOXMLEXPORT_TEST(testTdf126287, "tdf126287.docx") +{ + CPPUNIT_ASSERT_EQUAL(2, getPages()); +} + DECLARE_OOXMLEXPORT_TEST(testTdf123642_BookmarkAtDocEnd, "tdf123642.docx") { // get bookmark interface diff --git a/writerfilter/source/dmapper/DomainMapper.cxx b/writerfilter/source/dmapper/DomainMapper.cxx index 33db1c2cef4e..ffa829351baa 100644 --- a/writerfilter/source/dmapper/DomainMapper.cxx +++ b/writerfilter/source/dmapper/DomainMapper.cxx @@ -3225,6 +3225,18 @@ void DomainMapper::lcl_startParagraphGroup() void DomainMapper::lcl_endParagraphGroup() { + if (m_pImpl->isBreakDeferred(LINE_BREAK)) + { + if (m_pImpl->GetIsLastParagraphInSection()) + m_pImpl->clearDeferredBreak(LINE_BREAK); + + while (m_pImpl->isBreakDeferred(LINE_BREAK)) + { + m_pImpl->clearDeferredBreak(LINE_BREAK); + m_pImpl->appendTextPortion("\n", m_pImpl->GetTopContext()); + } + } + m_pImpl->PopProperties(CONTEXT_PARAGRAPH); if (m_pImpl->hasTableManager()) m_pImpl->getTableManager().endParagraphGroup(); @@ -3374,6 +3386,13 @@ void DomainMapper::lcl_text(const sal_uInt8 * data_, size_t len) case 0x0e: //column break m_pImpl->deferBreak(COLUMN_BREAK); return; + case 0x0a: //line break + if (m_pImpl->GetIsLastParagraphInSection()) + { + m_pImpl->deferBreak(LINE_BREAK); + return; + } + break; case 0x07: m_pImpl->getTableManager().text(data_, len); return; @@ -3406,6 +3425,13 @@ void DomainMapper::lcl_text(const sal_uInt8 * data_, size_t len) // GetTopContext() is changed by inserted breaks, but we want to keep the current context PropertyMapPtr pContext = m_pImpl->GetTopContext(); + + while (m_pImpl->isBreakDeferred(LINE_BREAK)) + { + m_pImpl->clearDeferredBreak(LINE_BREAK); + m_pImpl->appendTextPortion("\n", pContext); + } + if (!m_pImpl->GetFootnoteContext()) { if (m_pImpl->isBreakDeferred(PAGE_BREAK)) @@ -3629,6 +3655,18 @@ void DomainMapper::lcl_utext(const sal_uInt8 * data_, size_t len) m_pImpl->m_bHasFtnSep = true; return; } + else if (len == 1 && sText[0] == '\r') + { + // Clear "last" one linebreak at end of section + if (m_pImpl->GetIsLastParagraphInSection() && m_pImpl->isBreakDeferred(LINE_BREAK)) + m_pImpl->clearDeferredBreak(LINE_BREAK); + // And emit all other linebreaks + while (m_pImpl->isBreakDeferred(LINE_BREAK)) + { + m_pImpl->clearDeferredBreak(LINE_BREAK); + m_pImpl->appendTextPortion("\n", m_pImpl->GetTopContext()); + } + } else if (len == 1 && sText[0] == '\t' ) { if ( m_pImpl->m_bCheckFirstFootnoteTab && m_pImpl->IsInFootOrEndnote() ) @@ -3659,6 +3697,12 @@ void DomainMapper::lcl_utext(const sal_uInt8 * data_, size_t len) try { + while (m_pImpl->isBreakDeferred(LINE_BREAK)) + { + m_pImpl->clearDeferredBreak(LINE_BREAK); + m_pImpl->appendTextPortion("\n", m_pImpl->GetTopContext()); + } + m_pImpl->getTableManager().utext(data_, len); if (bNewLine) diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx index 35b9964ef73d..1a7fbfda6b9b 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx @@ -286,6 +286,7 @@ DomainMapper_Impl::DomainMapper_Impl( m_bIsFirstSection( true ), m_bIsColumnBreakDeferred( false ), m_bIsPageBreakDeferred( false ), + m_nLineBreaksDeferred( 0 ), m_bSdtEndDeferred(false), m_bParaSdtEndDeferred(false), m_bStartTOC(false), @@ -1113,9 +1114,12 @@ void DomainMapper_Impl::deferBreak( BreakType deferredBreakType) { switch (deferredBreakType) { - case COLUMN_BREAK: - m_bIsColumnBreakDeferred = true; + case LINE_BREAK: + m_nLineBreaksDeferred++; break; + case COLUMN_BREAK: + m_bIsColumnBreakDeferred = true; + break; case PAGE_BREAK: // See SwWW8ImplReader::HandlePageBreakChar(), page break should be // ignored inside tables. @@ -1133,6 +1137,8 @@ bool DomainMapper_Impl::isBreakDeferred( BreakType deferredBreakType ) { switch (deferredBreakType) { + case LINE_BREAK: + return m_nLineBreaksDeferred > 0; case COLUMN_BREAK: return m_bIsColumnBreakDeferred; case PAGE_BREAK: @@ -1146,6 +1152,10 @@ void DomainMapper_Impl::clearDeferredBreak(BreakType deferredBreakType) { switch (deferredBreakType) { + case LINE_BREAK: + assert(m_nLineBreaksDeferred > 0); + m_nLineBreaksDeferred--; + break; case COLUMN_BREAK: m_bIsColumnBreakDeferred = false; break; @@ -1159,6 +1169,7 @@ void DomainMapper_Impl::clearDeferredBreak(BreakType deferredBreakType) void DomainMapper_Impl::clearDeferredBreaks() { + m_nLineBreaksDeferred = 0; m_bIsColumnBreakDeferred = false; m_bIsPageBreakDeferred = false; } diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.hxx b/writerfilter/source/dmapper/DomainMapper_Impl.hxx index 9733c4a2cac7..bb37c556ebc6 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.hxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.hxx @@ -112,7 +112,8 @@ enum { NUMBER_OF_CONTEXTS = CONTEXT_LIST + 1 }; enum BreakType { PAGE_BREAK, - COLUMN_BREAK + COLUMN_BREAK, + LINE_BREAK }; /** @@ -476,6 +477,7 @@ private: bool m_bIsFirstSection; bool m_bIsColumnBreakDeferred; bool m_bIsPageBreakDeferred; + sal_Int32 m_nLineBreaksDeferred; /// If we want to set "sdt end" on the next character context. bool m_bSdtEndDeferred; /// If we want to set "paragraph sdt end" on the next paragraph context.