sw/qa/extras/ooxmlexport/data/tdf126287.docx |binary sw/qa/extras/ooxmlexport/ooxmlexport17.cxx | 5 ++ writerfilter/source/dmapper/DomainMapper.cxx | 44 ++++++++++++++++++++++ writerfilter/source/dmapper/DomainMapper_Impl.cxx | 15 ++++++- writerfilter/source/dmapper/DomainMapper_Impl.hxx | 4 +- 5 files changed, 65 insertions(+), 3 deletions(-)
New commits: commit 19bca24486315cc35f873486e6a2dd18394d0614 Author: Vasily Melenchuk <vasily.melenc...@cib.de> AuthorDate: Fri Jul 30 17:22:38 2021 +0300 Commit: Thorsten Behrens <thorsten.behr...@allotropia.de> CommitDate: Mon Feb 7 21:20:15 2022 +0100 tdf#126287: docx import: use defered linebreak In some cases when we have combo linebreak, endparagraph and end section at very bottom of page, MS Word does ignores linebreak to avoid empty page with same section and lonely end paragraph mark. With defered linebreak we could simulate and ignore such linebreaks during import. Change-Id: Ie6ee4c0990ee1c2e853691a76953c83191664e43 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/119709 Tested-by: Jenkins Reviewed-by: Thorsten Behrens <thorsten.behr...@allotropia.de> diff --git a/sw/qa/extras/ooxmlexport/data/tdf126287.docx b/sw/qa/extras/ooxmlexport/data/tdf126287.docx new file mode 100644 index 000000000000..cf2250b9f9a4 Binary files /dev/null and b/sw/qa/extras/ooxmlexport/data/tdf126287.docx differ diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx index 7dec733c8717..f3ed7d2127da 100644 --- a/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx +++ b/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx @@ -165,6 +165,11 @@ DECLARE_OOXMLEXPORT_TEST(TestWPGZOrder, "testWPGZOrder.docx") } } +DECLARE_OOXMLEXPORT_TEST(testTdf126287, "tdf126287.docx") +{ + CPPUNIT_ASSERT_EQUAL(2, getPages()); +} + DECLARE_OOXMLEXPORT_TEST(testTdf123642_BookmarkAtDocEnd, "tdf123642.docx") { // get bookmark interface diff --git a/writerfilter/source/dmapper/DomainMapper.cxx b/writerfilter/source/dmapper/DomainMapper.cxx index 6a18d8d12da0..eed7ab8d45b0 100644 --- a/writerfilter/source/dmapper/DomainMapper.cxx +++ b/writerfilter/source/dmapper/DomainMapper.cxx @@ -3225,6 +3225,18 @@ void DomainMapper::lcl_startParagraphGroup() void DomainMapper::lcl_endParagraphGroup() { + if (m_pImpl->isBreakDeferred(LINE_BREAK)) + { + if (m_pImpl->GetIsLastParagraphInSection()) + m_pImpl->clearDeferredBreak(LINE_BREAK); + + while (m_pImpl->isBreakDeferred(LINE_BREAK)) + { + m_pImpl->clearDeferredBreak(LINE_BREAK); + m_pImpl->appendTextPortion("\n", m_pImpl->GetTopContext()); + } + } + m_pImpl->PopProperties(CONTEXT_PARAGRAPH); if (m_pImpl->hasTableManager()) m_pImpl->getTableManager().endParagraphGroup(); @@ -3385,6 +3397,13 @@ void DomainMapper::lcl_text(const sal_uInt8 * data_, size_t len) case 0x0e: //column break m_pImpl->deferBreak(COLUMN_BREAK); return; + case 0x0a: //line break + if (m_pImpl->GetIsLastParagraphInSection()) + { + m_pImpl->deferBreak(LINE_BREAK); + return; + } + break; case 0x07: m_pImpl->getTableManager().text(data_, len); return; @@ -3417,6 +3436,13 @@ void DomainMapper::lcl_text(const sal_uInt8 * data_, size_t len) // GetTopContext() is changed by inserted breaks, but we want to keep the current context PropertyMapPtr pContext = m_pImpl->GetTopContext(); + + while (m_pImpl->isBreakDeferred(LINE_BREAK)) + { + m_pImpl->clearDeferredBreak(LINE_BREAK); + m_pImpl->appendTextPortion("\n", pContext); + } + if (!m_pImpl->GetFootnoteContext()) { if (m_pImpl->isBreakDeferred(PAGE_BREAK)) @@ -3640,6 +3666,18 @@ void DomainMapper::lcl_utext(const sal_uInt8 * data_, size_t len) m_pImpl->m_bHasFtnSep = true; return; } + else if (len == 1 && sText[0] == '\r') + { + // Clear "last" one linebreak at end of section + if (m_pImpl->GetIsLastParagraphInSection() && m_pImpl->isBreakDeferred(LINE_BREAK)) + m_pImpl->clearDeferredBreak(LINE_BREAK); + // And emit all other linebreaks + while (m_pImpl->isBreakDeferred(LINE_BREAK)) + { + m_pImpl->clearDeferredBreak(LINE_BREAK); + m_pImpl->appendTextPortion("\n", m_pImpl->GetTopContext()); + } + } else if (len == 1 && sText[0] == '\t' ) { if ( m_pImpl->m_bCheckFirstFootnoteTab && m_pImpl->IsInFootOrEndnote() ) @@ -3670,6 +3708,12 @@ void DomainMapper::lcl_utext(const sal_uInt8 * data_, size_t len) try { + while (m_pImpl->isBreakDeferred(LINE_BREAK)) + { + m_pImpl->clearDeferredBreak(LINE_BREAK); + m_pImpl->appendTextPortion("\n", m_pImpl->GetTopContext()); + } + m_pImpl->getTableManager().utext(data_, len); if (bNewLine) diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx index 548425914616..87c954bbe9ca 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx @@ -286,6 +286,7 @@ DomainMapper_Impl::DomainMapper_Impl( m_bIsFirstSection( true ), m_bIsColumnBreakDeferred( false ), m_bIsPageBreakDeferred( false ), + m_nLineBreaksDeferred( 0 ), m_bSdtEndDeferred(false), m_bParaSdtEndDeferred(false), m_bStartTOC(false), @@ -1114,9 +1115,12 @@ void DomainMapper_Impl::deferBreak( BreakType deferredBreakType) { switch (deferredBreakType) { - case COLUMN_BREAK: - m_bIsColumnBreakDeferred = true; + case LINE_BREAK: + m_nLineBreaksDeferred++; break; + case COLUMN_BREAK: + m_bIsColumnBreakDeferred = true; + break; case PAGE_BREAK: // See SwWW8ImplReader::HandlePageBreakChar(), page break should be // ignored inside tables. @@ -1134,6 +1138,8 @@ bool DomainMapper_Impl::isBreakDeferred( BreakType deferredBreakType ) { switch (deferredBreakType) { + case LINE_BREAK: + return m_nLineBreaksDeferred > 0; case COLUMN_BREAK: return m_bIsColumnBreakDeferred; case PAGE_BREAK: @@ -1147,6 +1153,10 @@ void DomainMapper_Impl::clearDeferredBreak(BreakType deferredBreakType) { switch (deferredBreakType) { + case LINE_BREAK: + assert(m_nLineBreaksDeferred > 0); + m_nLineBreaksDeferred--; + break; case COLUMN_BREAK: m_bIsColumnBreakDeferred = false; break; @@ -1160,6 +1170,7 @@ void DomainMapper_Impl::clearDeferredBreak(BreakType deferredBreakType) void DomainMapper_Impl::clearDeferredBreaks() { + m_nLineBreaksDeferred = 0; m_bIsColumnBreakDeferred = false; m_bIsPageBreakDeferred = false; } diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.hxx b/writerfilter/source/dmapper/DomainMapper_Impl.hxx index 7ede5cb2f91d..bbd5d566367e 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.hxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.hxx @@ -114,7 +114,8 @@ enum { NUMBER_OF_CONTEXTS = CONTEXT_LIST + 1 }; enum BreakType { PAGE_BREAK, - COLUMN_BREAK + COLUMN_BREAK, + LINE_BREAK }; /** @@ -478,6 +479,7 @@ private: bool m_bIsFirstSection; bool m_bIsColumnBreakDeferred; bool m_bIsPageBreakDeferred; + sal_Int32 m_nLineBreaksDeferred; /// If we want to set "sdt end" on the next character context. bool m_bSdtEndDeferred; /// If we want to set "paragraph sdt end" on the next paragraph context.