sw/qa/extras/htmlexport/htmlexport.cxx | 41 ++++++++++++++++++++++++++++++++ sw/source/filter/html/htmlatr.cxx | 12 +++++++-- sw/source/filter/html/htmlnumwriter.cxx | 4 ++- sw/source/filter/html/wrthtml.hxx | 3 +- 4 files changed, 56 insertions(+), 4 deletions(-)
New commits: commit b2bee5a4db5552c4d408800908ca717b4ea2564a Author: Miklos Vajna <vmik...@collabora.com> AuthorDate: Thu Jun 16 15:55:23 2022 +0200 Commit: Miklos Vajna <vmik...@collabora.com> CommitDate: Thu Jun 16 17:07:03 2022 +0200 sw HTML export, XHTML mode: fix lost <li> with a list header + item There is a general mismatch between XHTML and Writer lists: XHTML can only contain list items (for ordered or unordered lists), while Writer can contain list headers and list items. List headers have no bullet or number at the start, list items are the normal text nodes. Commit 8c2607ae3ce143586e623532b8ae5288277ec3ac (sw HTML export, XHTML mode: fix lost </li> when last list item is not numbered, 2022-02-21) fixed the list item end side of this problem: if all text nodes in a list are headers, then don't write ul/ol at all, otherwise end list headers with </li> as well to make sure the output XML is valid. However, this created a mis-match, the starting <li> for list headers in a list which have non-header text nodes at as was not adapted. Fix the problem by extending OutHTML_SwFormat() so list headers in a list with non-header text nodes always have a <li> and </li>, and this condition is the same on the start/end side. Calculating if at least one text node is non-header in a list may not be cheap, so reuse the already calculated info from OutHTML_NumberBulletListStart() in OutHTML_SwFormat(). Change-Id: I3817a489f16166fc5b4c33ee64e2283c41a4402c Reviewed-on: https://gerrit.libreoffice.org/c/core/+/135999 Reviewed-by: Miklos Vajna <vmik...@collabora.com> Tested-by: Jenkins diff --git a/sw/qa/extras/htmlexport/htmlexport.cxx b/sw/qa/extras/htmlexport/htmlexport.cxx index bc0af92e93c0..44654bdbcf5a 100644 --- a/sw/qa/extras/htmlexport/htmlexport.cxx +++ b/sw/qa/extras/htmlexport/htmlexport.cxx @@ -1530,6 +1530,47 @@ CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testPartiallyNumberedList) "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ol/reqif-xhtml:li/reqif-xhtml:p", 2); } +CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testListHeaderAndItem) +{ + // Given a document with a list, first para is not numbered, but the second is: + loadURL("private:factory/swriter", nullptr); + SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get()); + SwWrtShell* pWrtShell = pTextDoc->GetDocShell()->GetWrtShell(); + pWrtShell->Insert("not numbered"); + SwDoc* pDoc = pWrtShell->GetDoc(); + sal_uInt16 nPos = pDoc->MakeNumRule(pDoc->GetUniqueNumRuleName()); + SwNumRule* pNumRule = pDoc->GetNumRuleTable()[nPos]; + { + SwNode& rNode = pWrtShell->GetCursor()->GetPoint()->nNode.GetNode(); + SwTextNode& rTextNode = *rNode.GetTextNode(); + rTextNode.SetAttr(SwNumRuleItem(pNumRule->GetName())); + rTextNode.SetCountedInList(false); + } + pWrtShell->SplitNode(); + pWrtShell->Insert2("numbered"); + { + SwNode& rNode = pWrtShell->GetCursor()->GetPoint()->nNode.GetNode(); + SwTextNode& rTextNode = *rNode.GetTextNode(); + rTextNode.SetAttr(SwNumRuleItem(pNumRule->GetName())); + } + + // When exporting to ReqIF: + ExportToReqif(); + + // Then make sure the output is well-formed xhtml: + SvMemoryStream aStream; + HtmlExportTest::wrapFragment(maTempFile, aStream); + xmlDocUniquePtr pXmlDoc = parseXmlStream(&aStream); + // Without the accompanying fix in place, this test would have failed: + // Entity: line 3: parser error : Opening and ending tag mismatch: ol line 3 and li + // <reqif-xhtml:ol><reqif-xhtml:p>not numbered</reqif-xhtml:p></reqif-xhtml:li> + CPPUNIT_ASSERT(pXmlDoc); + // Make sure that in case the list has a header and an item, then both are wrapped in an <li> + // element. + assertXPath(pXmlDoc, + "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ol/reqif-xhtml:li/reqif-xhtml:p", 2); +} + CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testBlockQuoteNoMargin) { // Given a document with some text, para style set to Quotations, no bottom margin: diff --git a/sw/source/filter/html/htmlatr.cxx b/sw/source/filter/html/htmlatr.cxx index 1f1254c19bb3..db67572c545c 100644 --- a/sw/source/filter/html/htmlatr.cxx +++ b/sw/source/filter/html/htmlatr.cxx @@ -681,11 +681,12 @@ static void OutHTML_SwFormat( Writer& rWrt, const SwFormat& rFormat, if( nNewDefListLvl != rHWrt.m_nDefListLvl ) rHWrt.OutAndSetDefList( nNewDefListLvl ); + bool bAtLeastOneNumbered = false; // if necessary, start a bulleted or numbered list if( rInfo.bInNumberBulletList ) { OSL_ENSURE( !rHWrt.m_nDefListLvl, "DL cannot be inside OL!" ); - OutHTML_NumberBulletListStart( rHWrt, aNumInfo ); + OutHTML_NumberBulletListStart( rHWrt, aNumInfo, bAtLeastOneNumbered ); if( bNumbered ) { @@ -754,7 +755,14 @@ static void OutHTML_SwFormat( Writer& rWrt, const SwFormat& rFormat, bool bXhtmlBlockQuote = rHWrt.mbXHTML && rInfo.aToken == OOO_STRING_SVTOOLS_HTML_blockquote; // if necessary, start a new list item - if( rInfo.bInNumberBulletList && bNumbered ) + bool bNumberedForListItem = bNumbered; + if (!bNumberedForListItem && rHWrt.mbXHTML && bAtLeastOneNumbered) + { + // OutHTML_NumberBulletListEnd() will end a list item if at least one text node is numbered + // in the list, so open the list item with the same condition here. + bNumberedForListItem = true; + } + if( rInfo.bInNumberBulletList && bNumberedForListItem ) { HtmlWriter html(rWrt.Strm(), rHWrt.maNamespace); html.start(OOO_STRING_SVTOOLS_HTML_li); diff --git a/sw/source/filter/html/htmlnumwriter.cxx b/sw/source/filter/html/htmlnumwriter.cxx index df15619cd85e..f41ac73929f9 100644 --- a/sw/source/filter/html/htmlnumwriter.cxx +++ b/sw/source/filter/html/htmlnumwriter.cxx @@ -84,7 +84,8 @@ void SwHTMLWriter::SetNextNumInfo( std::unique_ptr<SwHTMLNumRuleInfo> pNxt ) } Writer& OutHTML_NumberBulletListStart( SwHTMLWriter& rWrt, - const SwHTMLNumRuleInfo& rInfo ) + const SwHTMLNumRuleInfo& rInfo, + bool& rAtLeastOneNumbered ) { SwHTMLNumRuleInfo& rPrevInfo = rWrt.GetNumInfo(); bool bSameRule = rPrevInfo.GetNumRule() == rInfo.GetNumRule(); @@ -124,6 +125,7 @@ Writer& OutHTML_NumberBulletListStart( SwHTMLWriter& rWrt, ++nPos; } + rAtLeastOneNumbered = bAtLeastOneNumbered; if (!bAtLeastOneNumbered) { return rWrt; diff --git a/sw/source/filter/html/wrthtml.hxx b/sw/source/filter/html/wrthtml.hxx index 866442a34928..5db3c3c2849a 100644 --- a/sw/source/filter/html/wrthtml.hxx +++ b/sw/source/filter/html/wrthtml.hxx @@ -722,7 +722,8 @@ Writer& OutCSS1_NumberBulletListStyleOpt( Writer& rWrt, const SwNumRule& rNumRul sal_uInt8 nLevel ); Writer& OutHTML_NumberBulletListStart( SwHTMLWriter& rWrt, - const SwHTMLNumRuleInfo& rInfo ); + const SwHTMLNumRuleInfo& rInfo, + bool& rAtLeastOneNumbered ); Writer& OutHTML_NumberBulletListEnd( SwHTMLWriter& rWrt, const SwHTMLNumRuleInfo& rNextInfo );