sw/qa/extras/rtfexport/data/fdo55504-1-min.rtf | 49 +++++++++++++++++++++++ sw/qa/extras/rtfexport/rtfexport2.cxx | 3 - sw/qa/extras/rtfexport/rtfexport8.cxx | 25 ++++++++++- sw/source/core/unocore/unotext.cxx | 3 + writerfilter/source/rtftok/rtfdispatchsymbol.cxx | 3 - writerfilter/source/rtftok/rtfdocumentimpl.cxx | 6 ++ writerfilter/source/rtftok/rtfsdrimport.hxx | 1 7 files changed, 85 insertions(+), 5 deletions(-)
New commits: commit 582ef812702413dbe7fb0f132bca3e3e4c2e1d40 Author: Michael Stahl <michael.st...@allotropia.de> AuthorDate: Fri Feb 9 17:51:03 2024 +0100 Commit: Michael Stahl <michael.st...@allotropia.de> CommitDate: Mon Feb 12 11:05:45 2024 +0100 tdf#158983 writerfilter: RTF import: fix page breaks and shape anchors Somehow, not sure why, the added import of \wrapdefault in commit 86c0f58b6f9f392865196606173d1b98a6897f32 caused the page break in fdo55504-1.rtf to get lost. The first problem is that there is a \sbknone before the first \sect - this should not have any effect before \sect because \sbk* affect the *previous* section break, but it's not an option to simply ignore it (even if it works for this bugdoc) because it may be that there is no \sectd after \sect and then it will have an effect for the later section. The problem was in handling \page: here the premature \sbknone caused a sectBreak() which ate the page break; ignore it here by checking m_bHadSect. The second problem then was that now all but the first shape were anchored on page 2. This was because RTFDocumentImpl::beforePopState() for \shape of the 1st shape called parBreak() and that set the bIsFirstParaInSection flag. This flag prevented DomainMapper::lcl_utext() in the "if (m_pImpl->isBreakDeferred(PAGE_BREAK)) if (GetSplitPgBreakAndParaMark())" branch from inserting another paragraph break that is necessary to preserve the already inserted shapes anchored to the 2nd paragraph on page 1. (This is how it works for the equivalent DOCX document, with settings.xml edited to add w:splitPgBreakAndParaMark and remove "compatibilityMode" etc. because Word 2013 doesn't set these correctly.) The consequence is that when the second SwTextNode is converted to a text frame, all the shape anchors move to the next paragraph, the one with the RES_BREAK on it. Fix this by limiting the parBreak() handling in RTFDocumentImpl::beforePopState() to when the shape is a SwGrfNode, which is the scenario in the commit 0d9132c5046e15540abc20e45d64080708626441 "fdo#47036 fix RTF import of shapes inside text frames at the start of the doc" - the testFdo47036 fails if the block is removed completely. This caused 2 test failures, but both cases look the same as in Word 2013 now: Test name: (anonymous namespace)::testTdf158826_extraCR::Load_Verify_Reload_Verify An uncaught exception of type com.sun.star.uno.RuntimeException - unsatisfied query for interface of type com.sun.star.text.XTextTable! rtfexport2.cxx:537:Assertion Test name: (anonymous namespace)::testFdo47495::Load_Verify_Reload_Verify equality assertion failed - Expected: 2 - Actual : 1 Change-Id: I43fa9431721650a6d748d1f4bda9aeaa7a9c6b45 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/163200 Tested-by: Jenkins Reviewed-by: Michael Stahl <michael.st...@allotropia.de> diff --git a/sw/qa/extras/rtfexport/data/fdo55504-1-min.rtf b/sw/qa/extras/rtfexport/data/fdo55504-1-min.rtf new file mode 100644 index 000000000000..6e7667629969 --- /dev/null +++ b/sw/qa/extras/rtfexport/data/fdo55504-1-min.rtf @@ -0,0 +1,49 @@ +{ tf1deflang1025nsinsicpg1251\uc1deff0\deff0\stshfdbch0\stshfloch0\stshfhich0\stshfbi0\deflang1049\deflangfe1049{ onttbl{0romancharset204prq2{\*\panose 02020603050405020304}Times New Roman;}{1swisscharset204prq2{\*\panose 020b0604020202020204}Arial;}{39 romancharset0prq2 Times New Roman;} +{37romancharset238prq2 Times New Roman CE;}{40romancharset161prq2 Times New Roman Greek;}{41romancharset162prq2 Times New Roman Tur;}{42 bidi romancharset177prq2 Times New Roman (Hebrew);} +{43bidi romancharset178prq2 Times New Roman (Arabic);}{44roman charset186prq2 Times New Roman Baltic;}{45romancharset163prq2 Times New Roman (Vietnamese);}{49swisscharset0prq2 Arial;} +{47swisscharset238prq2 Arial CE;}{50swisscharset161prq2 Arial Greek;}{ 51swisscharset162prq2 Arial Tur;}{52bidi swisscharset177prq2 Arial (Hebrew);}{53bidi swisscharset178prq2 Arial (Arabic);} +{54swisscharset186prq2 Arial Baltic;}{55swisscharset163prq2 Arial (Vietnamese);}}{+ ed255\green255lue0; ed255\green255lue255; ed0\green0lue128; ed0\green128lue128; ed0\green128lue0; ed128\green0lue128; ed128\green0lue0; ed128\green128lue0; ed128\green128lue128; ed192\green192lue192;}{\stylesheet{ +\ql \li0 i0\widctlpar\wrapdefaultspalphaspnumaautodjustright in0\lin0\itap0 tlchcs1 f0fs24lang1025 \ltrchcs0 s24\lang1049\langfe1049+Default Paragraph Font;}{\* s11 srowd rftsWidthB3 rpaddl108 rpaddr108 rpaddfl3 rpaddft3 rpaddfb3 rpaddfr3 rcbpat1 rcfpat1 blind0 blindtype3 scellwidthfts0 svertalt sbrdrt sbrdrl sbrdrb sbrdrr sbrdrdgl sbrdrdgr sbrdrh sbrdrv +\ql \li0 i0\widctlpar\wrapdefaultspalphaspnumaautodjustright in0\lin0\itap0 tlchcs1 f0fs20 \ltrchcs0 s20\lang1024\langfe1024+{\*\latentstyles\lsdstimax156\lsdlockeddef0}{\* evtbl {Unknown;}}{\* sidtbl sid3757467}{\*\generator Microsoft Word 11.0.0000;}{\info{ itle \'d1\'cf\'d0\'c0\'c2\'ca\'c0 \'b9 6785}{uthor Crystal Reports}{\doccomm Powered By Crystal} +{\operator \'cf\'f0\'ee\'e3\'f0\'e0\'ec\'ec\'e8\'f1\'f2}{+{ ern24611}{\*\password 00000000}}{\*\xmlnstbl {\xmlns1 http://schemas.microsoft.com/office/word/2003/wordml}}\paperw16836\paperh11904\margl567\margr397\margt567\margb284\gutter0\ltrsect +\widowctrltnbjenddoc\donotembedsysfont0\donotembedlingdata1\grfdocevents0 alidatexml0\showplaceholdtext0\ignoremixedcontent0\saveinvalidxml0\showxmlerrors0\horzdoc\dghspace120\dgvspace120\dghorigin1701\dgvorigin1984\dghshow0\dgvshow3 +\jcompressiewkind1iewscale100 sidroot3757467 et0{\*\wgrffmtfilter 013f}\ilfomacatclnup0\ltrpar \sectd \ltrsect\lndscpsxn\sbknone\linex0\sectdefaultcl\sftnbj {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl2 +\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl6 +\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang +{\pntxtb (}{\pntxta )}}\pard\plain \ltrpar\qc \li0 i0 owidctlpar + x360 x720 x1080 x1440 x1800 x2160 x2520 x2880 x3240 x3600 x3960 x4320 x4680 x5040 x5400 x5760 x6120 x6480 x6840 x7200 x7560 x7920 x8280 x8640 x9000 x9360 x9720 x10080 x10440 x10800 x11160 x11520 x11880 + x12240 x12600\pvpg\phpg\posx2007\posy597bsh-900bsw12870\wrapdefault aauto in0\lin0\itap0 tlchcs1 f0fs24lang1025 \ltrchcs0 s24\lang1049\langfe1049+\lang1024\langfe1024 oproof\insrsid3757467 {\shp{\*\shpinst\shpleft6567\shptop3832\shpright8713\shpbottom3832\shpfhdr0\shpbxpage\shpbxignore\shpbypage\shpbyignore\shpwr3\shpwrk0\shpfblwtxt1\shpz0\shplid1026 +{\sp{\sn shapeType}{\sv 20}}{\sp{\sn fFlipH}{\sv 0}}{\sp{\sn fFlipV}{\sv 0}}{\sp{\sn lineWidth}{\sv 12700}}{\sp{\sn fLine}{\sv 1}}{\sp{\sn posrelh}{\sv 1}}{\sp{\sn posrelv}{\sv 1}}{\sp{\sn fLayoutInCell}{\sv 0}}{\sp{\sn fBehindDocument}{\sv 1}} +{\sp{\sn fLayoutInCell}{\sv 0}}}{\shprslt{\*\do\dobxpage\dobypage\dodhgt0\dpline\dpptx0\dppty0\dpptx2146\dppty0\dpx6567\dpy3832\dpxsize2146\dpysize0\dplinew20\dplinecor0\dplinecog0\dplinecob0}}} +{\shp{\*\shpinst\shpleft9912\shptop3862\shpright12073\shpbottom3862\shpfhdr0\shpbxpage\shpbxignore\shpbypage\shpbyignore\shpwr3\shpwrk0\shpfblwtxt1\shpz1\shplid1027{\sp{\sn shapeType}{\sv 20}}{\sp{\sn fFlipH}{\sv 0}}{\sp{\sn fFlipV}{\sv 0}} +{\sp{\sn lineWidth}{\sv 12700}}{\sp{\sn fLine}{\sv 1}}{\sp{\sn posrelh}{\sv 1}}{\sp{\sn posrelv}{\sv 1}}{\sp{\sn fLayoutInCell}{\sv 0}}{\sp{\sn fBehindDocument}{\sv 1}}{\sp{\sn fLayoutInCell}{\sv 0}}}{\shprslt{\*\do\dobxpage\dobypage\dodhgt1 +\dpline\dpptx0\dppty0\dpptx2161\dppty0\dpx9912\dpy3862\dpxsize2161\dpysize0\dplinew20\dplinecor0\dplinecog0\dplinecob0}}} +} +{ tlchcs1 f1 \ltrchcs0 1s17+1\insrsid3757467 +\par }{ tlchcs1 f1 \ltrchcs0 1s17+}{ tlchcs1 f1 \ltrchcs0 1\insrsid3757467 +\par }{ tlchcs1 f1 \ltrchcs0 1s17+\ltrchcs0 1\insrsid3757467 +\par }{ tlchcs1 f1 \ltrchcs0 1s17+\par } +\pard \ltrpar\ql \li0 i0 owidctlpar\wrapdefaultaauto in0\lin0\itap0 { tlchcs1 f1 \ltrchcs0 1\insrsid3757467 \page \sect }\sectd \ltrsect\lndscpsxn\sbknone\linex0\sectdefaultcl\sftnbj \pard\plain \ltrpar\ql \li0 i0 owidctlpar + x360 x720 x1080 x1440 x1800 x2160 x2520 x2880 x3240 x3600 x3960 x4320 x4680 x5040 x5400 x5760 x6120 x6480 x6840 x7200 x7560 x7920 x8280 x8640 x9000 x9360 x9720 x10080 x10440 x10800 x11160 x11520 x11880 + x12240\pvpg\phpg\posx612\posy627bsh-225bsw12480\wrapdefaultaauto in0\lin0\itap0 tlchcs1 f0fs24lang1025 \ltrchcs0 s24\lang1049\langfe1049+\lang1024\langfe1024 oproof\insrsid3757467 {\shp{\*\shpinst\shpleft567\shptop867\shpright16423\shpbottom867\shpfhdr0\shpbxpage\shpbxignore\shpbypage\shpbyignore\shpwr3\shpwrk0\shpfblwtxt1\shpz29\shplid1055 +{\sp{\sn shapeType}{\sv 20}}{\sp{\sn fFlipH}{\sv 0}}{\sp{\sn fFlipV}{\sv 0}}{\sp{\sn lineWidth}{\sv 12700}}{\sp{\sn fLine}{\sv 1}}{\sp{\sn posrelh}{\sv 1}}{\sp{\sn posrelv}{\sv 1}}{\sp{\sn fLayoutInCell}{\sv 0}}{\sp{\sn fBehindDocument}{\sv 1}} +{\sp{\sn fLayoutInCell}{\sv 0}}}{\shprslt{\*\do\dobxpage\dobypage\dodhgt29\dpline\dpptx0\dppty0\dpptx15856\dppty0\dpx567\dpy867\dpxsize15856\dpysize0\dplinew20\dplinecor0\dplinecog0\dplinecob0}}} +{\shp{\*\shpinst\shpleft567\shptop1167\shpright16439\shpbottom1167\shpfhdr0\shpbxpage\shpbxignore\shpbypage\shpbyignore\shpwr3\shpwrk0\shpfblwtxt1\shpz30\shplid1056{\sp{\sn shapeType}{\sv 20}}{\sp{\sn fFlipH}{\sv 0}}{\sp{\sn fFlipV}{\sv 0}} +{\sp{\sn lineWidth}{\sv 12700}}{\sp{\sn fLine}{\sv 1}}{\sp{\sn posrelh}{\sv 1}}{\sp{\sn posrelv}{\sv 1}}{\sp{\sn fLayoutInCell}{\sv 0}}{\sp{\sn fBehindDocument}{\sv 1}}{\sp{\sn fLayoutInCell}{\sv 0}}}{\shprslt{\*\do\dobxpage\dobypage\dodhgt30 +\dpline\dpptx0\dppty0\dpptx15872\dppty0\dpx567\dpy1167\dpxsize15872\dpysize0\dplinew20\dplinecor0\dplinecog0\dplinecob0}}} +}{ tlchcs1 f1 \ltrchcs0 1s17+\'eb\'fc\'ed\'ee\'e5 \'ee\'e1\'f0\'e0\'e7\'ee\'e2\'e0\'f2\'e5\'eb\'fc\'ed\'ee\'e5 \'f3\'f7\'f0\'e5\'e6\'e4\'e5\'ed\'e8\'e5 \'e4\'e5\'f2\'f1\'ea\'e8\'e9 \'f1\'e0\'e4 \'b99 "\'d0\'ee\'ec\'e0\'f8\'ea\'e0" \'ca\'f3\'f0\'f1\'ea\'ee\'e3\'ee \'ec\'f3\'ed\'e8\'f6 +\'e8\'ef\'e0\'eb\'fc\'ed\'ee\'e3\'ee \'f0\'e0\'e9\'ee\'ed\'e0 \'d1\'f2\'e0\'e2\'f0\'ee\'ef\'ee\'eb\'fc\'f1\'ea\'ee\'e3\'ee \'ea\'f0\'e0\'ff}{ tlchcs1 f1 \ltrchcs0 1\insrsid3757467 +\par } +\pard \ltrpar\ql \li0 i0 owidctlpar\wrapdefaultaauto in0\lin0\itap0 { tlchcs1 f1 \ltrchcs0 1\insrsid3757467 +\par }} diff --git a/sw/qa/extras/rtfexport/rtfexport2.cxx b/sw/qa/extras/rtfexport/rtfexport2.cxx index 4327a3911c5b..29f3ec3eb4b4 100644 --- a/sw/qa/extras/rtfexport/rtfexport2.cxx +++ b/sw/qa/extras/rtfexport/rtfexport2.cxx @@ -534,7 +534,8 @@ DECLARE_RTFEXPORT_TEST(testFdo48446, "fdo48446.rtf") DECLARE_RTFEXPORT_TEST(testFdo47495, "fdo47495.rtf") { // Used to have 4 paragraphs, as a result the original bugdoc had 2 pages instead of 1. - CPPUNIT_ASSERT_EQUAL(2, getParagraphs()); + // Word 2013 shows 1 paragraph + CPPUNIT_ASSERT_EQUAL(1, getParagraphs()); } DECLARE_RTFEXPORT_TEST(testAllGapsWord, "all_gaps_word.rtf") diff --git a/sw/qa/extras/rtfexport/rtfexport8.cxx b/sw/qa/extras/rtfexport/rtfexport8.cxx index 08ca8452f928..6773aba8d44e 100644 --- a/sw/qa/extras/rtfexport/rtfexport8.cxx +++ b/sw/qa/extras/rtfexport/rtfexport8.cxx @@ -109,6 +109,27 @@ DECLARE_RTFEXPORT_TEST(testTdf158586_lostFrame, "tdf158586_lostFrame.rtf") CPPUNIT_ASSERT_EQUAL(2, getPages()); } +DECLARE_RTFEXPORT_TEST(testTdf158983, "fdo55504-1-min.rtf") +{ + // the problem was that the page break was missing and the shapes were + // all anchored to the same node + + const auto& pLayout = parseLayoutDump(); + assertXPath(pLayout, "/root/page[1]/body/section/txt"_ostr, 1); + assertXPath(pLayout, "/root/page[1]/body/section/txt/anchored/fly"_ostr, 1); + // Word shows these shapes anchored in the fly, not body, but at least they are not lost + assertXPath(pLayout, "/root/page[1]/body/section/txt/anchored/SwAnchoredDrawObject"_ostr, 2); + // page break, paragraph break, section break. + assertXPath(pLayout, "/root/page[2]/body/section[1]/txt"_ostr, 1); + assertXPath(pLayout, "/root/page[2]/body/section[1]/txt/anchored"_ostr, 0); + assertXPath(pLayout, "/root/page[2]/body/section[2]/txt"_ostr, 1); + assertXPath(pLayout, "/root/page[2]/body/section[2]/txt/anchored/fly"_ostr, 1); + // Word shows these shapes anchored in the fly, not body, but at least they are not lost + assertXPath(pLayout, "/root/page[2]/body/section[2]/txt/anchored/SwAnchoredDrawObject"_ostr, 2); + + CPPUNIT_ASSERT_EQUAL(2, getPages()); +} + DECLARE_RTFEXPORT_TEST(testAnnotationPar, "tdf136445-1-min.rtf") { // the problem was that the paragraph break following annotation was missing @@ -130,8 +151,8 @@ DECLARE_RTFEXPORT_TEST(testTdf158826_extraCR, "tdf158826_extraCR.rtf") // The page break defined before the document content should not cause a page break CPPUNIT_ASSERT_EQUAL(1, getPages()); - // There is a two-column floating table [that SHOULD be getParagraphOrTable(1)] - uno::Reference<text::XTextTable> xTable(getParagraphOrTable(2), uno::UNO_QUERY_THROW); + // There is a two-column floating table + uno::Reference<text::XTextTable> xTable(getParagraphOrTable(1), uno::UNO_QUERY_THROW); } DECLARE_RTFEXPORT_TEST(testTdf158830, "tdf158830.rtf") diff --git a/sw/source/core/unocore/unotext.cxx b/sw/source/core/unocore/unotext.cxx index f959d6a6104c..93603d6d407d 100644 --- a/sw/source/core/unocore/unotext.cxx +++ b/sw/source/core/unocore/unotext.cxx @@ -1634,6 +1634,9 @@ SwXText::convertToTextFrame( // see testFlyInFly for why this checks only the edges of the selection, // and testFloatingTablesAnchor for why it excludes pre/post table // added nodes + // TODO: isGraphicNode here looks dubious; see also tdf#47036 fix; + // this needs more investigation when exactly Word considers something + // anchored in text frame vs. anchored in body. if (!isGraphicNode(pFrameFormat) && (IsAtParaMatch(*oAnchorCheckPam, rAnchor) || (RndStdIds::FLY_AT_CHAR == rAnchor.GetAnchorId() diff --git a/writerfilter/source/rtftok/rtfdispatchsymbol.cxx b/writerfilter/source/rtftok/rtfdispatchsymbol.cxx index 0f01d79f5cd4..62ec8bf97e58 100644 --- a/writerfilter/source/rtftok/rtfdispatchsymbol.cxx +++ b/writerfilter/source/rtftok/rtfdispatchsymbol.cxx @@ -371,7 +371,8 @@ RTFError RTFDocumentImpl::dispatchSymbol(RTFKeyword nKeyword) = m_aStates.top().getSectionSprms().find(NS_ooxml::LN_EG_SectPrContents_titlePg); if (((pBreak && pBreak->getInt() - == static_cast<sal_Int32>(NS_ooxml::LN_Value_ST_SectionMark_continuous)) + == static_cast<sal_Int32>(NS_ooxml::LN_Value_ST_SectionMark_continuous) + && m_bHadSect) // tdf#158983 before first \sect, ignore \sbknone! || m_nResetBreakOnSectBreak == RTFKeyword::SBKNONE) && !(pTitlePg && pTitlePg->getInt())) { diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx index 188cec25b95a..19fe333e3e0c 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx @@ -2968,7 +2968,11 @@ RTFError RTFDocumentImpl::beforePopState(RTFParserState& rState) case Destination::SHAPE: m_bNeedFinalPar = true; m_bNeedCr = m_bNeedCrOrig; - if (rState.getFrame().hasProperties()) + // tdf#47036 insert paragraph break for graphic object inside text + // frame at start of document - TODO: the object may actually be + // anchored inside the text frame and this ends up putting the + // anchor in the body, but better than losing the shape... + if (rState.getFrame().hasProperties() && m_pSdrImport->isTextGraphicObject()) { // parBreak() modifies m_aStates.top() so we can't apply resetFrame() directly on aState resetFrame(); diff --git a/writerfilter/source/rtftok/rtfsdrimport.hxx b/writerfilter/source/rtftok/rtfsdrimport.hxx index 16f7f9c319ac..b06803bd0f64 100644 --- a/writerfilter/source/rtftok/rtfsdrimport.hxx +++ b/writerfilter/source/rtftok/rtfsdrimport.hxx @@ -77,6 +77,7 @@ public: void popParent(); css::uno::Reference<css::drawing::XShape> const& getCurrentShape() const { return m_xShape; } bool isFakePict() const { return m_bFakePict; } + bool isTextGraphicObject() const { return m_bTextGraphicObject; } private: void createShape(const OUString& rService, css::uno::Reference<css::drawing::XShape>& xShape,