sw/qa/extras/htmlexport/htmlexport.cxx | 36 +++++++++++++++++++++++++++++++++ sw/source/filter/html/htmlatr.cxx | 30 ++++++++++++++++++++++++++- sw/source/filter/html/wrthtml.cxx | 8 +++++++ sw/source/filter/html/wrthtml.hxx | 3 ++ 4 files changed, 76 insertions(+), 1 deletion(-)
New commits: commit 505f5db522f8406715f455d8007d014073a99097 Author: Miklos Vajna <vmik...@collabora.com> AuthorDate: Tue Feb 15 17:07:45 2022 +0100 Commit: Miklos Vajna <vmik...@collabora.com> CommitDate: Tue Feb 15 20:24:06 2022 +0100 sw HTML export: add a new LeadingTabWidth option This is a simple way to not loose indentation done with tabs (e.g. source code) during the HTML export. A more complex way would be ask the layout for the tab portion width, ask VCL what's the size of an nbsp glyph and then act accordingly, which is is not done here. Change-Id: I2a5c0512e9e5541e55e10f29952679bf05d63f1b Reviewed-on: https://gerrit.libreoffice.org/c/core/+/129974 Reviewed-by: Miklos Vajna <vmik...@collabora.com> Tested-by: Jenkins diff --git a/sw/qa/extras/htmlexport/htmlexport.cxx b/sw/qa/extras/htmlexport/htmlexport.cxx index fa40c8d7d04c..d83750951544 100644 --- a/sw/qa/extras/htmlexport/htmlexport.cxx +++ b/sw/qa/extras/htmlexport/htmlexport.cxx @@ -2077,6 +2077,42 @@ CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testTrailingLineBreak) CPPUNIT_ASSERT_EQUAL(OUString("test\n"), aActual); } +CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testLeadingTab) +{ + // Given a document with leading tabs: + SwDoc* pDoc = createSwDoc(); + SwWrtShell* pWrtShell = pDoc->GetDocShell()->GetWrtShell(); + pWrtShell->Insert("\t first"); + pWrtShell->SplitNode(); + pWrtShell->Insert("\t\t second"); + pWrtShell->SplitNode(); + pWrtShell->Insert("thi \t rd"); + + // When exporting to HTML, using LeadingTabWidth=2: + uno::Reference<frame::XStorable> xStorable(mxComponent, uno::UNO_QUERY); + uno::Sequence<beans::PropertyValue> aStoreProperties = { + comphelper::makePropertyValue("FilterName", OUString("HTML (StarWriter)")), + comphelper::makePropertyValue("FilterOptions", OUString("xhtmlns=reqif-xhtml")), + comphelper::makePropertyValue("LeadingTabWidth", static_cast<sal_Int32>(2)), + }; + xStorable->storeToURL(maTempFile.GetURL(), aStoreProperties); + + // Then make sure that leading tabs are replaced with 2 nbsps: + SvMemoryStream aStream; + HtmlExportTest::wrapFragment(maTempFile, aStream); + xmlDocUniquePtr pXmlDoc = parseXmlStream(&aStream); + CPPUNIT_ASSERT(pDoc); + // Without the accompanying fix in place, this test would have failed with: + // - Expected: <nbsp><nbsp><space>first + // - Actual : <tab><space>first + // i.e. the leading tab was not replaced by 2 nbsps. + assertXPathContent(pXmlDoc, "//reqif-xhtml:p[1]", u"\xa0\xa0 first"); + // Test a leading tab that is not at the start of the paragraph: + assertXPathContent(pXmlDoc, "//reqif-xhtml:p[2]", u"\xa0\xa0\xa0\xa0 second"); + // Test a tab which is not leading: + assertXPathContent(pXmlDoc, "//reqif-xhtml:p[3]", u"thi \t rd"); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sw/source/filter/html/htmlatr.cxx b/sw/source/filter/html/htmlatr.cxx index a78c5e272d56..43e2e3fadb78 100644 --- a/sw/source/filter/html/htmlatr.cxx +++ b/sw/source/filter/html/htmlatr.cxx @@ -2357,6 +2357,8 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) { HTMLOutContext aContext( rHTMLWrt.m_eDestEnc ); + // Tabs are leading till there is a non-tab since the start of the paragraph. + bool bLeadingTab = true; for( ; nStrPos < nEnd; nStrPos++ ) { // output the frames that are anchored to the current position @@ -2491,7 +2493,33 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) rHTMLWrt.OutPointFieldmarks(aMarkPos); } else - HTMLOutFuncs::Out_Char( rWrt.Strm(), c, aContext, &rHTMLWrt.m_aNonConvertableCharacters ); + { + bool bConsumed = false; + if (c == '\t') + { + if (bLeadingTab && rHTMLWrt.m_nLeadingTabWidth.has_value()) + { + // Consume a tab if it's leading and we know the number of NBSPs to + // be used as a replacement. + for (sal_Int32 i = 0; i < *rHTMLWrt.m_nLeadingTabWidth; ++i) + { + rWrt.Strm().WriteCharPtr(" "); + } + bConsumed = true; + } + } + else + { + // Not a tab -> later tabs are no longer leading. + bLeadingTab = false; + } + + if (!bConsumed) + { + HTMLOutFuncs::Out_Char(rWrt.Strm(), c, aContext, + &rHTMLWrt.m_aNonConvertableCharacters); + } + } if (!rHTMLWrt.mbReqIF) { diff --git a/sw/source/filter/html/wrthtml.cxx b/sw/source/filter/html/wrthtml.cxx index af92e3d46330..2b0d8ca5ff7b 100644 --- a/sw/source/filter/html/wrthtml.cxx +++ b/sw/source/filter/html/wrthtml.cxx @@ -320,6 +320,14 @@ void SwHTMLWriter::SetupFilterFromPropertyValues( // XHTML namespace implies XHTML. mbXHTML = true; } + + it = aStoreMap.find("LeadingTabWidth"); + if (it != aStoreMap.end()) + { + sal_Int32 nVal{}; + it->second >>= nVal; + m_nLeadingTabWidth.emplace(nVal); + } } ErrCode SwHTMLWriter::WriteStream() diff --git a/sw/source/filter/html/wrthtml.hxx b/sw/source/filter/html/wrthtml.hxx index c5cd5dbe0780..c407ba7da6a8 100644 --- a/sw/source/filter/html/wrthtml.hxx +++ b/sw/source/filter/html/wrthtml.hxx @@ -417,6 +417,9 @@ public: /// DPI used when exporting a vector shape as a bitmap. std::optional<sal_Int32> m_nShapeDPI; + /// If set, replace leading tabs with this many non-breaking spaces. + std::optional<sal_Int32> m_nLeadingTabWidth; + /// Construct an instance of SwHTMLWriter and optionally give it /// the filter options directly, which can also be set via SetupFilterOptions(). explicit SwHTMLWriter( const OUString& rBaseURL, const OUString& rFilterOptions = "" );