sw/qa/extras/rtfexport/rtfexport8.cxx | 29 +++++++++++++++-------- writerfilter/source/rtftok/rtfdispatchsymbol.cxx | 11 +++----- writerfilter/source/rtftok/rtfdocumentimpl.cxx | 18 ++++++++++++-- writerfilter/source/rtftok/rtfdocumentimpl.hxx | 2 + 4 files changed, 42 insertions(+), 18 deletions(-)
New commits: commit 0d66c56d48476efdf8d9d9d9549f59cdad5cb25d Author: Michael Stahl <michael.st...@allotropia.de> AuthorDate: Tue Jan 30 20:03:45 2024 +0100 Commit: Christian Lohmaier <lohmaier+libreoff...@googlemail.com> CommitDate: Thu Feb 1 12:43:41 2024 +0100 tdf#158586 writerfilter: RTF import: handle \sect in frame as \par This fixes the test testTdf158586_0 and testTdf158586_0B to look like in Word; the case appears a bit esoteric, hopefully Word won't actually create such documents? But Word will round-trip such bugdoc to a DOCX where the first w:p contains all of w:framePr and w:sectPr and w:br... Change-Id: I6ec09478a774e1e9c785e9482618c1afc388df0e Reviewed-on: https://gerrit.libreoffice.org/c/core/+/162778 Tested-by: Jenkins Reviewed-by: Michael Stahl <michael.st...@allotropia.de> (cherry picked from commit dbe78489e98d565b72a703524308523135ffdd67) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/162847 Reviewed-by: Christian Lohmaier <lohmaier+libreoff...@googlemail.com> diff --git a/sw/qa/extras/rtfexport/rtfexport8.cxx b/sw/qa/extras/rtfexport/rtfexport8.cxx index 41dec0a64695..70dd0974c616 100644 --- a/sw/qa/extras/rtfexport/rtfexport8.cxx +++ b/sw/qa/extras/rtfexport/rtfexport8.cxx @@ -49,17 +49,24 @@ DECLARE_RTFEXPORT_TEST(testTdf158586_0, "tdf158586_pageBreak0.rtf") { // The specified page break must be lost because it is in a text frame CPPUNIT_ASSERT_EQUAL(1, getPages()); - // CPPUNIT_ASSERT_EQUAL(1, getParagraphs()); + CPPUNIT_ASSERT_EQUAL(1, getParagraphs()); - // There should be no empty carriage return at the start of the second page - // const auto& pLayout = parseLayoutDump(); - // assertXPathContent(pLayout, "//page[1]/body/txt"_ostr, "First page");} + // There should be no empty paragraph at the start + const auto& pLayout = parseLayoutDump(); + assertXPath(pLayout, "//anchored", 1); + assertXPathContent(pLayout, "/root/page[1]/body//txt", "First page"); } DECLARE_RTFEXPORT_TEST(testTdf158586_0B, "tdf158586_pageBreak0B.rtf") { // The specified page break must be lost because it is in a text frame CPPUNIT_ASSERT_EQUAL(1, getPages()); + CPPUNIT_ASSERT_EQUAL(1, getParagraphs()); + + // There should be no empty paragraph at the start + const auto& pLayout = parseLayoutDump(); + assertXPath(pLayout, "//anchored", 1); + assertXPathContent(pLayout, "/root/page[1]/body//txt", "First page"); } DECLARE_RTFEXPORT_TEST(testTdf158586_1, "tdf158586_pageBreak1.rtf") diff --git a/writerfilter/source/rtftok/rtfdispatchsymbol.cxx b/writerfilter/source/rtftok/rtfdispatchsymbol.cxx index 334d2a557b0b..c7eee5b0f4b1 100644 --- a/writerfilter/source/rtftok/rtfdispatchsymbol.cxx +++ b/writerfilter/source/rtftok/rtfdispatchsymbol.cxx @@ -132,7 +132,7 @@ RTFError RTFDocumentImpl::dispatchSymbol(RTFKeyword nKeyword) case RTFKeyword::SECT: { m_bHadSect = true; - if (m_bIgnoreNextContSectBreak) + if (m_bIgnoreNextContSectBreak || m_aStates.top().getFrame().hasProperties()) { // testContSectionPageBreak: need \par now dispatchSymbol(RTFKeyword::PAR); commit e63a75e00dcd797b0bd061add3c72dcb7b353007 Author: Michael Stahl <michael.st...@allotropia.de> AuthorDate: Tue Jan 30 17:46:12 2024 +0100 Commit: Christian Lohmaier <lohmaier+libreoff...@googlemail.com> CommitDate: Thu Feb 1 12:43:32 2024 +0100 tdf#158586 writerfilter: RTF import: fix \page \sect \skbnone The problem is that \page is actually completely ignored in the bugdoc testTdf158586_1. If you delete the \sbknone then there is a page break but it's caused by \sect; the \page is still ignored. It is ignored because, first, the \page results in a deferred break in DomainMapper, then for \sect, the synthetic \par is dispatched and that moves the break from deferred to the top paragraph properties context, then sectBreak() calls endParagraphGroup() which just removes the top paragraph properties context. Remove the dispatchSymbol(RTFKeyword::PAR) for \sect, instead set a flag so that RTFDocumentImpl::sectBreak() does it. Add a new flag m_bParAtEndOfSection so that RTFDocumentImpl::parBreak() can suppress the startParagraphGroup(), so the deferred break remains present. This also fixes testTdf158586_lostFrame. (regression from commit 15b886f460919ea3dce425a621dc017c2992a96b) Change-Id: I82a00899a9448069832a0b2f98a96df00da75518 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/162770 Tested-by: Jenkins Reviewed-by: Michael Stahl <michael.st...@allotropia.de> (cherry picked from commit 57abad5cf990111fd7de011809d4421dc0550193) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/162846 Reviewed-by: Christian Lohmaier <lohmaier+libreoff...@googlemail.com> diff --git a/sw/qa/extras/rtfexport/rtfexport8.cxx b/sw/qa/extras/rtfexport/rtfexport8.cxx index 71c727e1511c..41dec0a64695 100644 --- a/sw/qa/extras/rtfexport/rtfexport8.cxx +++ b/sw/qa/extras/rtfexport/rtfexport8.cxx @@ -65,12 +65,14 @@ DECLARE_RTFEXPORT_TEST(testTdf158586_0B, "tdf158586_pageBreak0B.rtf") DECLARE_RTFEXPORT_TEST(testTdf158586_1, "tdf158586_pageBreak1.rtf") { // None of the specified text frame settings initiates a real text frame - page break not lost - // CPPUNIT_ASSERT_EQUAL(2, getPages()); - // CPPUNIT_ASSERT_EQUAL(2, getParagraphs()); + CPPUNIT_ASSERT_EQUAL(2, getPages()); + CPPUNIT_ASSERT_EQUAL(2, getParagraphs()); // There should be no empty carriage return at the start of the second page - // const auto& pLayout = parseLayoutDump(); - // assertXPathContent(pLayout, "//page[2]/body/txt"_ostr, "Second page"); + const auto& pLayout = parseLayoutDump(); + // on import there is a section on page 2; on reimport there is no section + // (probably not an important difference?) + assertXPathContent(pLayout, "/root/page[2]/body//txt", "Second page"); } DECLARE_RTFEXPORT_TEST(testTdf158586_lostFrame, "tdf158586_lostFrame.rtf") @@ -79,9 +81,9 @@ DECLARE_RTFEXPORT_TEST(testTdf158586_lostFrame, "tdf158586_lostFrame.rtf") const auto& pLayout = parseLayoutDump(); assertXPath(pLayout, "//anchored", 1); assertXPathContent(pLayout, "//page[1]/body//txt", "1st page"); - // assertXPathContent(pLayout, "//page[2]/body//txt"_ostr, "2nd page"); + assertXPathContent(pLayout, "//page[2]/body//txt", "2nd page"); - // CPPUNIT_ASSERT_EQUAL(2, getPages()); + CPPUNIT_ASSERT_EQUAL(2, getPages()); } CPPUNIT_PLUGIN_IMPLEMENT(); diff --git a/writerfilter/source/rtftok/rtfdispatchsymbol.cxx b/writerfilter/source/rtftok/rtfdispatchsymbol.cxx index d5b196790a50..334d2a557b0b 100644 --- a/writerfilter/source/rtftok/rtfdispatchsymbol.cxx +++ b/writerfilter/source/rtftok/rtfdispatchsymbol.cxx @@ -131,9 +131,6 @@ RTFError RTFDocumentImpl::dispatchSymbol(RTFKeyword nKeyword) break; case RTFKeyword::SECT: { - if (m_bNeedCr) - dispatchSymbol(RTFKeyword::PAR); - m_bHadSect = true; if (m_bIgnoreNextContSectBreak) { @@ -143,6 +140,10 @@ RTFError RTFDocumentImpl::dispatchSymbol(RTFKeyword nKeyword) } else { + if (m_bNeedCr) + { // tdf#158586 don't dispatch \par here, it eats deferred page breaks + setNeedPar(true); + } sectBreak(); if (m_nResetBreakOnSectBreak != RTFKeyword::invalid) { @@ -409,8 +410,6 @@ RTFError RTFDocumentImpl::dispatchSymbol(RTFKeyword nKeyword) } sal_uInt8 const sBreak[] = { 0xc }; Mapper().text(sBreak, 1); - // testFdo81892 don't do another \par break directly; because of - // GetSplitPgBreakAndParaMark() it does finishParagraph *twice* m_bNeedCr = true; } } diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx index d196993f9c60..91b2df21ba21 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx @@ -672,7 +672,10 @@ void RTFDocumentImpl::parBreak() m_bHadPicture = false; // start new one - Mapper().startParagraphGroup(); + if (!m_bParAtEndOfSection) + { + Mapper().startParagraphGroup(); + } } void RTFDocumentImpl::sectBreak(bool bFinal) @@ -686,14 +689,26 @@ void RTFDocumentImpl::sectBreak(bool bFinal) // unless this is the end of the doc, we had nothing since the last section break and this is not a continuous one. // Also, when pasting, it's fine to not have any paragraph inside the document at all. if (m_bNeedPar && (!bFinal || m_bNeedSect || bContinuous) && !isSubstream() && m_bIsNewDoc) + { + m_bParAtEndOfSection = true; dispatchSymbol(RTFKeyword::PAR); + } // It's allowed to not have a non-table paragraph at the end of an RTF doc, add it now if required. if (m_bNeedFinalPar && bFinal) { dispatchFlag(RTFKeyword::PARD); + m_bParAtEndOfSection = true; dispatchSymbol(RTFKeyword::PAR); m_bNeedSect = bNeedSect; } + // testTdf148515, if RTF ends with ow, endParagraphGroup() must be called! + if (!m_bParAtEndOfSection || m_aStates.top().getCurrentBuffer()) + { + Mapper().endParagraphGroup(); // < top para context dies with page break + } + m_bParAtEndOfSection = false; + // paragraph properties are *done* now - only section properties following + while (!m_nHeaderFooterPositions.empty()) { std::pair<Id, std::size_t> aPair = m_nHeaderFooterPositions.front(); @@ -726,7 +741,6 @@ void RTFDocumentImpl::sectBreak(bool bFinal) // The trick is that we send properties of the previous section right now, which will be exactly what dmapper expects. Mapper().props(pProperties); - Mapper().endParagraphGroup(); // End Section if (!m_pSuperstream) diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx b/writerfilter/source/rtftok/rtfdocumentimpl.hxx index 47fedc431bfb..f96c8ada6453 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx @@ -853,6 +853,8 @@ private: bool m_bNeedPar; /// If set, an empty paragraph will be added at the end of the document. bool m_bNeedFinalPar; + /// a synthetic \par was dispatched at the end of the current section + bool m_bParAtEndOfSection = false; /// The list table and list override table combined. RTFSprms m_aListTableSprms; /// Maps between listoverridetable and listtable indexes.