sw/CppunitTest_sw_filter_html.mk             |   75 +++++++++++++++++++++++++++
 sw/Module_sw.mk                              |    1 
 sw/qa/filter/html/data/empty-paragraph.xhtml |    3 +
 sw/qa/filter/html/html.cxx                   |   54 +++++++++++++++++++
 sw/source/filter/html/swhtml.cxx             |    5 +
 5 files changed, 136 insertions(+), 2 deletions(-)

New commits:
commit ec9c2b473c61041ffa0cb09f7c6f01d400d75ac9
Author:     Miklos Vajna <vmik...@collabora.com>
AuthorDate: Mon Sep 5 16:15:21 2022 +0200
Commit:     Xisco Fauli <xiscofa...@libreoffice.org>
CommitDate: Tue Sep 6 12:25:29 2022 +0200

    sw XHTML import: fix lost empty paragraphs
    
    Plain HTML import ignores empty paragraph, because browsers ignore such
    paragraphs as well.
    
    This has the benefit of layout compatibility, but it breaks the
    semantics of documents when roundtripping them from Writer's document
    model to XHTML and back.
    
    Fix the problem by disabling this tweak for XHTML: the idea is that when
    it comes to paragraph / line breaks, XHTML is meant to preserve the
    semantics of the original document model, even if that results in slight
    differences in HTML rendering. So in case Writer/ODT doesn't collapse
    multiple line breaks and browsers/HTML does that, we apply workarounds
    in the HTML case, but not in the XHTML case.
    
    Change-Id: I55de8880503ee2d48fbd7a6af3891f2754f0d172
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/139439
    Reviewed-by: Miklos Vajna <vmik...@collabora.com>
    Tested-by: Jenkins
    Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org>
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/139465

diff --git a/sw/CppunitTest_sw_filter_html.mk b/sw/CppunitTest_sw_filter_html.mk
new file mode 100644
index 000000000000..130afe370dc7
--- /dev/null
+++ b/sw/CppunitTest_sw_filter_html.mk
@@ -0,0 +1,75 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#*************************************************************************
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+#*************************************************************************
+
+$(eval $(call gb_CppunitTest_CppunitTest,sw_filter_html))
+
+$(eval $(call gb_CppunitTest_use_common_precompiled_header,sw_filter_html))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,sw_filter_html, \
+    sw/qa/filter/html/html \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,sw_filter_html, \
+    comphelper \
+    cppu \
+    cppuhelper \
+    editeng \
+    sal \
+    sfx \
+    svl \
+    svx \
+    svxcore \
+    sw \
+    swqahelper \
+    test \
+    unotest \
+    utl \
+    vcl \
+    tl \
+))
+
+$(eval $(call gb_CppunitTest_use_externals,sw_filter_html,\
+    boost_headers \
+    libxml2 \
+))
+
+$(eval $(call gb_CppunitTest_set_include,sw_filter_html,\
+    -I$(SRCDIR)/sw/inc \
+    -I$(SRCDIR)/sw/source/core/inc \
+    -I$(SRCDIR)/sw/source/uibase/inc \
+    -I$(SRCDIR)/sw/qa/inc \
+    $$(INCLUDE) \
+))
+
+$(eval $(call gb_CppunitTest_use_api,sw_filter_html,\
+       udkapi \
+       offapi \
+       oovbaapi \
+))
+
+$(eval $(call gb_CppunitTest_use_ure,sw_filter_html))
+$(eval $(call gb_CppunitTest_use_vcl,sw_filter_html))
+
+$(eval $(call gb_CppunitTest_use_rdb,sw_filter_html,services))
+
+$(eval $(call gb_CppunitTest_use_custom_headers,sw_filter_html,\
+    officecfg/registry \
+))
+
+$(eval $(call gb_CppunitTest_use_configuration,sw_filter_html))
+
+$(eval $(call gb_CppunitTest_use_uiconfigs,sw_filter_html, \
+    modules/swriter \
+))
+
+$(eval $(call gb_CppunitTest_use_more_fonts,sw_filter_html))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sw/Module_sw.mk b/sw/Module_sw.mk
index 0672ce372abe..951b620aba5c 100644
--- a/sw/Module_sw.mk
+++ b/sw/Module_sw.mk
@@ -149,6 +149,7 @@ $(eval $(call gb_Module_add_slowcheck_targets,sw,\
     CppunitTest_sw_core_view \
     CppunitTest_sw_core_attr \
     CppunitTest_sw_filter_ww8 \
+    CppunitTest_sw_filter_html \
 ))
 
 ifneq ($(DISABLE_GUI),TRUE)
diff --git a/sw/qa/filter/html/data/empty-paragraph.xhtml 
b/sw/qa/filter/html/data/empty-paragraph.xhtml
new file mode 100644
index 000000000000..2a4ba3f65459
--- /dev/null
+++ b/sw/qa/filter/html/data/empty-paragraph.xhtml
@@ -0,0 +1,3 @@
+<reqif-xhtml:div><reqif-xhtml:p>a</reqif-xhtml:p>
+<reqif-xhtml:p></reqif-xhtml:p>
+</reqif-xhtml:div>
diff --git a/sw/qa/filter/html/html.cxx b/sw/qa/filter/html/html.cxx
new file mode 100644
index 000000000000..1b75903383d0
--- /dev/null
+++ b/sw/qa/filter/html/html.cxx
@@ -0,0 +1,54 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <swmodeltestbase.hxx>
+
+#include <comphelper/propertyvalue.hxx>
+
+namespace
+{
+constexpr OUStringLiteral DATA_DIRECTORY = u"/sw/qa/filter/html/data/";
+
+/**
+ * Covers sw/source/filter/html/ fixes.
+ *
+ * Note that these tests are meant to be simple: either load a file and assert 
some result or build
+ * a document model with code, export and assert that result.
+ *
+ * Keep using the various sw_<format>import/export suites for multiple filter 
calls inside a single
+ * test.
+ */
+class Test : public SwModelTestBase
+{
+};
+
+CPPUNIT_TEST_FIXTURE(Test, testEmptyParagraph)
+{
+    // Given a document with 2 paragraphs, the second is empty:
+    OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + 
"empty-paragraph.xhtml";
+    uno::Sequence<beans::PropertyValue> aLoadArgs = {
+        comphelper::makePropertyValue("FilterName", OUString("HTML 
(StarWriter)")),
+        comphelper::makePropertyValue("FilterOptions", 
OUString("xhtmlns=reqif-xhtml")),
+    };
+
+    // When loading that file:
+    mxComponent = loadFromDesktop(aURL, OUString(), aLoadArgs);
+
+    // Then make sure that the resulting document has a 2nd empty paragraph:
+    getParagraph(1, "a");
+    // Without the accompanying fix in place, this test would have failed with:
+    // An uncaught exception of type 
com.sun.star.container.NoSuchElementException
+    // i.e. the 2nd paragraph was lost.
+    getParagraph(2);
+}
+}
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/filter/html/swhtml.cxx b/sw/source/filter/html/swhtml.cxx
index 80d466660d83..e76421579e9f 100644
--- a/sw/source/filter/html/swhtml.cxx
+++ b/sw/source/filter/html/swhtml.cxx
@@ -4032,10 +4032,11 @@ void SwHTMLParser::EndPara( bool bReal )
 #endif
     }
 
-    // Netscape skips empty paragraphs, we do the same.
+    // Netscape skips empty paragraphs, we do the same; unless in XHTML mode, 
which prefers mapping
+    // the source document to the doc model 1:1 if possible.
     if( bReal )
     {
-        if( m_pPam->GetPoint()->nContent.GetIndex() )
+        if( m_pPam->GetPoint()->nContent.GetIndex() || m_bXHTML)
             AppendTextNode( AM_SPACE );
         else
             AddParSpace();

Reply via email to