sw/qa/extras/rtfexport/data/page-break-emptyparas-spltpgpar.rtf |   13 +++++
 sw/qa/extras/rtfexport/data/page-break-emptyparas.rtf           |   12 ++++
 sw/qa/extras/rtfexport/rtfexport5.cxx                           |    4 -
 sw/qa/extras/rtfexport/rtfexport6.cxx                           |    7 ++
 sw/qa/extras/rtfexport/rtfexport7.cxx                           |   25 
++++++++++
 sw/qa/extras/rtfimport/rtfimport.cxx                            |    5 +-
 writerfilter/source/dmapper/DomainMapper.cxx                    |    4 +
 writerfilter/source/rtftok/rtfdispatchflag.cxx                  |    7 ++
 writerfilter/source/rtftok/rtfdispatchsymbol.cxx                |   25 
+++++-----
 writerfilter/source/rtftok/rtfdocumentimpl.cxx                  |   16 +++++-
 10 files changed, 96 insertions(+), 22 deletions(-)

New commits:
commit 022e12e9af6e92ac3db853e73b7b2577b3c0511e
Author:     Michael Stahl <michael.st...@allotropia.de>
AuthorDate: Fri Nov 10 17:04:27 2023 +0100
Commit:     Thorsten Behrens <thorsten.behr...@allotropia.de>
CommitDate: Tue Nov 28 21:54:41 2023 +0100

    tdf#153194 writerfilter: RTF import: \spltpgpar
    
     1. Some experimenting with the bugdoc (saving it to DOCX in Word
        changes the layout in Word to exactly what Writer imports from RTF!)
        leads to DOCX w:splitPgBreakAndParaMark setting.
    
     2. the RTF implementation of \spltpgpar was missing; apparently if the
        flag is present the "new" behavior is in effect, which is the
        opposite of how it's specified in RTF Spec 1.9.1.
    
     3. the DomainMapper code that uses this attribute is not in the text()
        function to which RTFDocumentImpl sends paragraph breaks, but in the
        utext() function, so send the break there instead, rather than
        creating even more copypasta.
    
     4. now some filters-text crashes with nullptr pContext in
        DomainMapper::lcl_utext(), avoid that.
    
     5. dispatchSymbol(m_nResetBreakOnSectBreak) doesn't do anything because
        these are handled by dispatchFlag().
    
     6. Test name: testFdo81892::Load_Verify_Reload_Verify
        equality assertion failed
        - Expected: Performance
        - Actual  :
    
        Fails because additional paragraph break inserted after \page; in
        dispatchSymbol() for \sect, remove the parBreak() as already hinted at
        in commit 3c610336a58f644525d5e4d2566c35eee6f7a618
    
     7. rtfimport.cxx:868:Assertion
        Test name: testContSectionPageBreak::TestBody
        equality assertion failed
        - Expected:
        - Actual  : THIRD
    
        It has no paragraph between SECOND and THIRD, whereas Word
        definitely shows a paragraph there.  In dispatchSymbol() for \sect,
        sectBreak() is not called (which may create a paragraph break); in
        m_bIgnoreNextContSectBreak case this needs to be done manually for
        cont-section-pagebreak.rtf to get the empty paragraph between SECOND
        and THIRD.
    
     8. testFdo52052 fails; in dispatchSymbol() for \sect, if the document
        ends with \sect (e.g. fdo52052.rtf) a paragraph break must be
        inserted after this (because DomainMapper unconditionally removes
        the last paragraph break), but not via m_bNeedCr as that creates
        unwanted page break in testNestedTable (m_bNeedCr =>
        dispatchSymbol(\par) => m_bNeedSect => sectBreak()); handle it in
        RTFDocumentImpl::popState() for the end of the document by
        dispatching \par.
    
     9. rtfimport.cxx:1519:Assertion
    
        testTdf108947 now has 1 empty paragraph in the header instead of 2;
        Word also shows only 1 so it's an improvement.
    
    10. Test name: testFdo49893_2::Load_Verify_Reload_Verify
        equality assertion failed
        - Expected: 1
        - Actual  : 0
        - xpath should match exactly 1 node
    
        This was reduced to only 2 pages, while Word shows 5; in
        dispatchSymbol() for \page, for the consecutive \page send an empty
        string to DomainMapper's utext() which causes a paragraph break to
        be created if \spltpgpar isn't set (this was not at all obvious!).
    
    11. testTdf133437 fails with some numbers of flys changing, but it had
        those values before commit 3c610336a58f644525d5e4d2566c35eee6f7a618
        which says "the exact number isn't that interesting".
    
    12. testTdf153613_anchoredAfterPgBreak4 fails, but it now looks as in
        Word, so this is a bugfix.
    
    (13. Jenkins build on WNT (only) crashes in testForcepoint93 in sw
        layout code - strangely in the libreoffice-7-6 branch this happens
        on Linux too and it was fixed with commit
        191babee4f0ec643b80e96b0cd98c2d04ff96e4e)
    
    Change-Id: Ia1063693d96adff900ece943020a5bf69bdeb7a2
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/159471
    Tested-by: Jenkins
    Reviewed-by: Michael Stahl <michael.st...@allotropia.de>
    (cherry picked from commit 15b886f460919ea3dce425a621dc017c2992a96b)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/159458
    Reviewed-by: Thorsten Behrens <thorsten.behr...@allotropia.de>

diff --git a/sw/qa/extras/rtfexport/data/page-break-emptyparas-spltpgpar.rtf 
b/sw/qa/extras/rtfexport/data/page-break-emptyparas-spltpgpar.rtf
new file mode 100644
index 000000000000..dfb1eeec0f4a
--- /dev/null
+++ b/sw/qa/extras/rtfexport/data/page-break-emptyparas-spltpgpar.rtf
@@ -0,0 +1,13 @@
+{\rtf1
+\spltpgpar
+\sectd
+\pard\plain
+BBBBBBBBBBBBBB
+\par
+\pard
+\page
+\par
+\pard
+CCCCCCCCCCCCCCCC
+\par
+}
diff --git a/sw/qa/extras/rtfexport/data/page-break-emptyparas.rtf 
b/sw/qa/extras/rtfexport/data/page-break-emptyparas.rtf
new file mode 100644
index 000000000000..a5f731aaf03b
--- /dev/null
+++ b/sw/qa/extras/rtfexport/data/page-break-emptyparas.rtf
@@ -0,0 +1,12 @@
+{\rtf1
+\sectd
+\pard\plain
+BBBBBBBBBBBBBB
+\par
+\pard
+\page
+\par
+\pard
+CCCCCCCCCCCCCCCC
+\par
+}
diff --git a/sw/qa/extras/rtfexport/rtfexport5.cxx 
b/sw/qa/extras/rtfexport/rtfexport5.cxx
index c8b9ac0dc206..61cb17741eb6 100644
--- a/sw/qa/extras/rtfexport/rtfexport5.cxx
+++ b/sw/qa/extras/rtfexport/rtfexport5.cxx
@@ -104,10 +104,10 @@ 
DECLARE_RTFEXPORT_TEST(testTdf153613_anchoredAfterPgBreak4, "tdf153613_anchoredA
     // An anchored TO character image (followed by nothing) anchors before the 
page break, no split.
     // This differs from #1 only in that it has a preceding character run 
before the page break.
     CPPUNIT_ASSERT_EQUAL(2, getPages());
-    CPPUNIT_ASSERT_MESSAGE("YOU FIXED ME!", 3 != getParagraphs());
+    CPPUNIT_ASSERT_EQUAL(3, getParagraphs());
 
     const auto& pLayout = parseLayoutDump();
-    assertXPath(pLayout, "//page[2]//anchored", 1); // DID YOU FIX ME? This 
should be page[1]
+    assertXPath(pLayout, "//page[1]//anchored", 1);
 }
 
 DECLARE_RTFEXPORT_TEST(testTdf153613_anchoredAfterPgBreak5, 
"tdf153613_anchoredAfterPgBreak5.rtf")
diff --git a/sw/qa/extras/rtfexport/rtfexport6.cxx 
b/sw/qa/extras/rtfexport/rtfexport6.cxx
index 13afa440cd2d..1d11581aba78 100644
--- a/sw/qa/extras/rtfexport/rtfexport6.cxx
+++ b/sw/qa/extras/rtfexport/rtfexport6.cxx
@@ -103,6 +103,9 @@ DECLARE_RTFEXPORT_TEST(testFdo49893_2, "fdo49893-2.rtf")
     CPPUNIT_ASSERT_EQUAL(OUString("HEADER"), 
parseDump("/root/page[1]/header/txt/text()"));
     CPPUNIT_ASSERT_EQUAL(OUString("HEADER"), 
parseDump("/root/page[2]/header/txt/text()"));
     CPPUNIT_ASSERT_EQUAL(OUString("HEADER"), 
parseDump("/root/page[3]/header/txt/text()"));
+    CPPUNIT_ASSERT_EQUAL(OUString("HEADER"), 
parseDump("/root/page[4]/header/txt/text()"));
+    CPPUNIT_ASSERT_EQUAL(OUString("HEADER"), 
parseDump("/root/page[5]/header/txt/text()"));
+    CPPUNIT_ASSERT_EQUAL(5, getPages()); // Word has 5
 }
 
 DECLARE_RTFEXPORT_TEST(testFdo89496, "fdo89496.rtf")
@@ -556,10 +559,10 @@ DECLARE_RTFEXPORT_TEST(testTdf133437, "tdf133437.rtf")
     assertXPath(pDump, 
"/root/page[1]/body/txt[1]/anchored/SwAnchoredDrawObject", 79);
 
     // Second page
-    assertXPath(pDump, 
"/root/page[2]/body/txt[2]/anchored/SwAnchoredDrawObject", 118);
+    assertXPath(pDump, 
"/root/page[2]/body/txt[2]/anchored/SwAnchoredDrawObject", 120);
 
     // Third page
-    assertXPath(pDump, 
"/root/page[3]/body/txt[2]/anchored/SwAnchoredDrawObject", 84);
+    assertXPath(pDump, 
"/root/page[3]/body/txt[2]/anchored/SwAnchoredDrawObject", 86);
 }
 
 CPPUNIT_TEST_FIXTURE(Test, testTdf128320)
diff --git a/sw/qa/extras/rtfexport/rtfexport7.cxx 
b/sw/qa/extras/rtfexport/rtfexport7.cxx
index 4d1550af4fdd..8abc76ff35a0 100644
--- a/sw/qa/extras/rtfexport/rtfexport7.cxx
+++ b/sw/qa/extras/rtfexport/rtfexport7.cxx
@@ -12,6 +12,7 @@
 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
 #include <com/sun/star/drawing/FillStyle.hpp>
 #include <com/sun/star/drawing/PointSequenceSequence.hpp>
+#include <com/sun/star/style/BreakType.hpp>
 #include <com/sun/star/style/PageStyleLayout.hpp>
 #include <com/sun/star/text/FontEmphasis.hpp>
 #include <com/sun/star/text/TableColumnSeparator.hpp>
@@ -660,6 +661,30 @@ DECLARE_RTFEXPORT_TEST(testWatermark, "watermark.rtf")
     CPPUNIT_ASSERT_EQUAL(float(66), nFontSize);
 }
 
+DECLARE_RTFEXPORT_TEST(testTdf153194Compat, "page-break-emptyparas.rtf")
+{
+    CPPUNIT_ASSERT_EQUAL(2, getPages());
+    // no \spltpgpar => paragraph 2 on page 1
+    CPPUNIT_ASSERT_EQUAL(style::BreakType_NONE,
+                         getProperty<style::BreakType>(getParagraph(1), 
"BreakType"));
+    CPPUNIT_ASSERT_EQUAL(style::BreakType_PAGE_BEFORE,
+                         getProperty<style::BreakType>(getParagraph(2), 
"BreakType"));
+    CPPUNIT_ASSERT_EQUAL(style::BreakType_NONE,
+                         getProperty<style::BreakType>(getParagraph(3), 
"BreakType"));
+}
+
+DECLARE_RTFEXPORT_TEST(testTdf153194New, "page-break-emptyparas-spltpgpar.rtf")
+{
+    CPPUNIT_ASSERT_EQUAL(2, getPages());
+    // \spltpgpar => paragraph 2 on page 2
+    CPPUNIT_ASSERT_EQUAL(style::BreakType_NONE,
+                         getProperty<style::BreakType>(getParagraph(1), 
"BreakType"));
+    CPPUNIT_ASSERT_EQUAL(style::BreakType_NONE,
+                         getProperty<style::BreakType>(getParagraph(2), 
"BreakType"));
+    CPPUNIT_ASSERT_EQUAL(style::BreakType_PAGE_BEFORE,
+                         getProperty<style::BreakType>(getParagraph(3), 
"BreakType"));
+}
+
 DECLARE_RTFEXPORT_TEST(testTdf153178, "tdf153178.rtf")
 {
     // the problem was that a frame was created
diff --git a/sw/qa/extras/rtfimport/rtfimport.cxx 
b/sw/qa/extras/rtfimport/rtfimport.cxx
index 7dd9aa43877f..be75cee76577 100644
--- a/sw/qa/extras/rtfimport/rtfimport.cxx
+++ b/sw/qa/extras/rtfimport/rtfimport.cxx
@@ -868,6 +868,8 @@ CPPUNIT_TEST_FIXTURE(Test, testContSectionPageBreak)
                              ->getPropertyValue("PageDescName"));
     // actually not sure how many paragraph there should be between
     // SECOND and THIRD - important is that the page break is on there
+    // (could be either 1 or 2; in Word it's a 2-line paragraph with the 1st
+    // line containing only the page break being ~0 height)
     uno::Reference<text::XTextRange> xParaNext = getParagraph(3);
     CPPUNIT_ASSERT_EQUAL(OUString(), xParaNext->getString());
     //If PageDescName is not empty, a page break / switch to page style is 
defined
@@ -1520,8 +1522,7 @@ CPPUNIT_TEST_FIXTURE(Test, testTdf108947)
     uno::Reference<text::XText> xHeaderTextLeft = 
getProperty<uno::Reference<text::XText>>(
         getStyles("PageStyles")->getByName("Default Page Style"), 
"HeaderTextLeft");
     aActual = xHeaderTextLeft->getString();
-    CPPUNIT_ASSERT_EQUAL(OUString(SAL_NEWLINE_STRING SAL_NEWLINE_STRING 
"Header Page 2 ?"),
-                         aActual);
+    CPPUNIT_ASSERT_EQUAL(OUString(SAL_NEWLINE_STRING "Header Page 2 ?"), 
aActual);
 #endif
 }
 
diff --git a/writerfilter/source/dmapper/DomainMapper.cxx 
b/writerfilter/source/dmapper/DomainMapper.cxx
index adf956653071..7a9379698a59 100644
--- a/writerfilter/source/dmapper/DomainMapper.cxx
+++ b/writerfilter/source/dmapper/DomainMapper.cxx
@@ -4336,6 +4336,7 @@ void DomainMapper::lcl_utext(const sal_uInt8 * data_, 
size_t len)
             {
                 if (m_pImpl->isBreakDeferred(PAGE_BREAK))
                 {
+                    assert(pContext); // can't have deferred break without
                     if 
(m_pImpl->GetSettingsTable()->GetSplitPgBreakAndParaMark())
                     {
                         if ( m_pImpl->GetIsFirstParagraphInSection() || 
!m_pImpl->IsFirstRun() )
@@ -4358,6 +4359,7 @@ void DomainMapper::lcl_utext(const sal_uInt8 * data_, 
size_t len)
                 }
                 else if (m_pImpl->isBreakDeferred(COLUMN_BREAK))
                 {
+                    assert(pContext); // can't have deferred break without
                     if ( m_pImpl->GetIsFirstParagraphInSection() || 
!m_pImpl->IsFirstRun() )
                     {
                         mbIsSplitPara = true;
@@ -4382,7 +4384,7 @@ void DomainMapper::lcl_utext(const sal_uInt8 * data_, 
size_t len)
             // no runs, we should not create a paragraph for it in Writer, 
unless that would remove the whole section.
             // Also do not remove here column breaks: they are treated in a 
different way and place.
             bool bIsColumnBreak = false;
-            if (pContext->isSet(PROP_BREAK_TYPE))
+            if (pContext && pContext->isSet(PROP_BREAK_TYPE))
             {
                 const uno::Any aBreakType = 
pContext->getProperty(PROP_BREAK_TYPE)->second;
                 bIsColumnBreak =
diff --git a/writerfilter/source/rtftok/rtfdispatchflag.cxx 
b/writerfilter/source/rtftok/rtfdispatchflag.cxx
index 699698aa8df8..23d8f5d1f59f 100644
--- a/writerfilter/source/rtftok/rtfdispatchflag.cxx
+++ b/writerfilter/source/rtftok/rtfdispatchflag.cxx
@@ -1364,6 +1364,13 @@ RTFError RTFDocumentImpl::dispatchFlag(RTFKeyword 
nKeyword)
                                       new RTFValue(0));
         }
         break;
+        case RTFKeyword::SPLTPGPAR:
+        {
+            // if flag is present, it is turned *off* - opposite to what spec 
says
+            
m_aSettingsTableSprms.set(NS_ooxml::LN_CT_Compat_splitPgBreakAndParaMark,
+                                      new RTFValue(0));
+        }
+        break;
         default:
         {
             SAL_INFO("writerfilter", "TODO handle flag '" << 
keywordToString(nKeyword) << "'");
diff --git a/writerfilter/source/rtftok/rtfdispatchsymbol.cxx 
b/writerfilter/source/rtftok/rtfdispatchsymbol.cxx
index 9aa9a2ce4a2e..23a97fc68524 100644
--- a/writerfilter/source/rtftok/rtfdispatchsymbol.cxx
+++ b/writerfilter/source/rtftok/rtfdispatchsymbol.cxx
@@ -136,17 +136,23 @@ RTFError RTFDocumentImpl::dispatchSymbol(RTFKeyword 
nKeyword)
 
             m_bHadSect = true;
             if (m_bIgnoreNextContSectBreak)
+            {
+                // testContSectionPageBreak: need \par now
+                dispatchSymbol(RTFKeyword::PAR);
                 m_bIgnoreNextContSectBreak = false;
+            }
             else
             {
                 sectBreak();
                 if (m_nResetBreakOnSectBreak != RTFKeyword::invalid)
                 {
                     // this should run on _second_ \sect after \page
-                    dispatchSymbol(m_nResetBreakOnSectBreak); // lazy reset
+                    dispatchFlag(m_nResetBreakOnSectBreak); // lazy reset
                     m_nResetBreakOnSectBreak = RTFKeyword::invalid;
                     m_bNeedSect = false; // dispatchSymbol set it
                 }
+                setNeedPar(true); // testFdo52052: need \par at end of document
+                // testNestedTable: but not m_bNeedCr, that creates a page 
break
             }
         }
         break;
@@ -396,20 +402,15 @@ RTFError RTFDocumentImpl::dispatchSymbol(RTFKeyword 
nKeyword)
                     // Only send the paragraph properties early if we'll 
create a new paragraph in a
                     // bit anyway.
                     checkNeedPap();
+                    // flush previously deferred break - needed for 
testFdo49893_2
+                    // which has consecutive \page with no text between
+                    sal_uInt8 const nothing[] = { 0 /*MSVC doesn't allow it to 
be empty*/ };
+                    Mapper().utext(nothing, 0);
                 }
                 sal_uInt8 const sBreak[] = { 0xc };
                 Mapper().text(sBreak, 1);
-                if (bFirstRun || m_bNeedCr)
-                {
-                    // If we don't have content in the document yet (so the 
break-before can't move
-                    // to a second layout page) or we already have characters 
sent (so the paragraph
-                    // properties are already finalized), then continue 
inserting a fake paragraph.
-                    if (!m_bNeedPap)
-                    {
-                        parBreak();
-                        m_bNeedPap = true;
-                    }
-                }
+                // testFdo81892 don't do another \par break directly; because 
of
+                // GetSplitPgBreakAndParaMark() it does finishParagraph *twice*
                 m_bNeedCr = true;
             }
         }
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx 
b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index 45f49553b26b..b4ef4079dbf4 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -346,6 +346,9 @@ 
RTFDocumentImpl::RTFDocumentImpl(uno::Reference<uno::XComponentContext> const& x
 
     m_pTokenizer = new RTFTokenizer(*this, m_pInStream.get(), 
m_xStatusIndicator);
     m_pSdrImport = new RTFSdrImport(*this, m_xDstDoc);
+
+    // unlike OOXML, this is enabled by default
+    m_aSettingsTableSprms.set(NS_ooxml::LN_CT_Compat_splitPgBreakAndParaMark, 
new RTFValue(1));
 }
 
 RTFDocumentImpl::~RTFDocumentImpl() = default;
@@ -396,7 +399,7 @@ void RTFDocumentImpl::resolveSubstream(std::size_t nPos, Id 
nId, OUString const&
 void RTFDocumentImpl::outputSettingsTable()
 {
     // tdf#136740: do not change target document settings when pasting
-    if (!m_bIsNewDoc)
+    if (!m_bIsNewDoc || isSubstream())
         return;
     writerfilter::Reference<Properties>::Pointer_t pProp
         = new RTFReferenceProperties(m_aSettingsTableAttributes, 
m_aSettingsTableSprms);
@@ -644,8 +647,8 @@ void RTFDocumentImpl::runProps()
 
 void RTFDocumentImpl::runBreak()
 {
-    sal_uInt8 const sBreak[] = { 0xd };
-    Mapper().text(sBreak, 1);
+    sal_Unicode const sBreak[] = { 0x0d };
+    Mapper().utext(reinterpret_cast<sal_uInt8 const*>(sBreak), 1);
     m_bNeedCr = false;
 }
 
@@ -3657,6 +3660,13 @@ RTFError RTFDocumentImpl::popState()
             dispatchSymbol(RTFKeyword::PAR);
         if (m_bNeedSect) // may be set by dispatchSymbol above!
             sectBreak(true);
+        if (m_bNeedPar && !m_pSuperstream)
+        {
+            assert(!m_bNeedSect);
+            dispatchSymbol(RTFKeyword::PAR);
+            m_bNeedSect = false; // reset - m_bNeedPar was set for \sect at
+                // end of doc so don't need another one
+        }
     }
 
     m_aStates.pop();

Reply via email to