sw/CppunitTest_sw_filter_html.mk             |   75 +++++++++++++++++++++++++++
 sw/Module_sw.mk                              |    1 
 sw/qa/filter/html/data/empty-paragraph.xhtml |    3 +
 sw/qa/filter/html/html.cxx                   |   54 +++++++++++++++++++
 sw/source/filter/html/swhtml.cxx             |    5 +
 5 files changed, 136 insertions(+), 2 deletions(-)

New commits:
commit 16ed6110313cae310799a82294fc566ce75855a4
Author:     Miklos Vajna <vmik...@collabora.com>
AuthorDate: Mon Sep 5 16:15:21 2022 +0200
Commit:     Miklos Vajna <vmik...@collabora.com>
CommitDate: Mon Sep 5 21:00:44 2022 +0200

    sw XHTML import: fix lost empty paragraphs
    
    Plain HTML import ignores empty paragraph, because browsers ignore such
    paragraphs as well.
    
    This has the benefit of layout compatibility, but it breaks the
    semantics of documents when roundtripping them from Writer's document
    model to XHTML and back.
    
    Fix the problem by disabling this tweak for XHTML: the idea is that when
    it comes to paragraph / line breaks, XHTML is meant to preserve the
    semantics of the original document model, even if that results in slight
    differences in HTML rendering. So in case Writer/ODT doesn't collapse
    multiple line breaks and browsers/HTML does that, we apply workarounds
    in the HTML case, but not in the XHTML case.
    
    Change-Id: I55de8880503ee2d48fbd7a6af3891f2754f0d172
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/139439
    Reviewed-by: Miklos Vajna <vmik...@collabora.com>
    Tested-by: Jenkins

diff --git a/sw/CppunitTest_sw_filter_html.mk b/sw/CppunitTest_sw_filter_html.mk
new file mode 100644
index 000000000000..130afe370dc7
--- /dev/null
+++ b/sw/CppunitTest_sw_filter_html.mk
@@ -0,0 +1,75 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#*************************************************************************
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+#*************************************************************************
+
+$(eval $(call gb_CppunitTest_CppunitTest,sw_filter_html))
+
+$(eval $(call gb_CppunitTest_use_common_precompiled_header,sw_filter_html))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,sw_filter_html, \
+    sw/qa/filter/html/html \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,sw_filter_html, \
+    comphelper \
+    cppu \
+    cppuhelper \
+    editeng \
+    sal \
+    sfx \
+    svl \
+    svx \
+    svxcore \
+    sw \
+    swqahelper \
+    test \
+    unotest \
+    utl \
+    vcl \
+    tl \
+))
+
+$(eval $(call gb_CppunitTest_use_externals,sw_filter_html,\
+    boost_headers \
+    libxml2 \
+))
+
+$(eval $(call gb_CppunitTest_set_include,sw_filter_html,\
+    -I$(SRCDIR)/sw/inc \
+    -I$(SRCDIR)/sw/source/core/inc \
+    -I$(SRCDIR)/sw/source/uibase/inc \
+    -I$(SRCDIR)/sw/qa/inc \
+    $$(INCLUDE) \
+))
+
+$(eval $(call gb_CppunitTest_use_api,sw_filter_html,\
+       udkapi \
+       offapi \
+       oovbaapi \
+))
+
+$(eval $(call gb_CppunitTest_use_ure,sw_filter_html))
+$(eval $(call gb_CppunitTest_use_vcl,sw_filter_html))
+
+$(eval $(call gb_CppunitTest_use_rdb,sw_filter_html,services))
+
+$(eval $(call gb_CppunitTest_use_custom_headers,sw_filter_html,\
+    officecfg/registry \
+))
+
+$(eval $(call gb_CppunitTest_use_configuration,sw_filter_html))
+
+$(eval $(call gb_CppunitTest_use_uiconfigs,sw_filter_html, \
+    modules/swriter \
+))
+
+$(eval $(call gb_CppunitTest_use_more_fonts,sw_filter_html))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sw/Module_sw.mk b/sw/Module_sw.mk
index a784af17d121..4c7a9d4dbbaa 100644
--- a/sw/Module_sw.mk
+++ b/sw/Module_sw.mk
@@ -151,6 +151,7 @@ $(eval $(call gb_Module_add_slowcheck_targets,sw,\
     CppunitTest_sw_core_view \
     CppunitTest_sw_core_attr \
     CppunitTest_sw_filter_ww8 \
+    CppunitTest_sw_filter_html \
     CppunitTest_sw_a11y \
 ))
 
diff --git a/sw/qa/filter/html/data/empty-paragraph.xhtml 
b/sw/qa/filter/html/data/empty-paragraph.xhtml
new file mode 100644
index 000000000000..2a4ba3f65459
--- /dev/null
+++ b/sw/qa/filter/html/data/empty-paragraph.xhtml
@@ -0,0 +1,3 @@
+<reqif-xhtml:div><reqif-xhtml:p>a</reqif-xhtml:p>
+<reqif-xhtml:p></reqif-xhtml:p>
+</reqif-xhtml:div>
diff --git a/sw/qa/filter/html/html.cxx b/sw/qa/filter/html/html.cxx
new file mode 100644
index 000000000000..1b75903383d0
--- /dev/null
+++ b/sw/qa/filter/html/html.cxx
@@ -0,0 +1,54 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <swmodeltestbase.hxx>
+
+#include <comphelper/propertyvalue.hxx>
+
+namespace
+{
+constexpr OUStringLiteral DATA_DIRECTORY = u"/sw/qa/filter/html/data/";
+
+/**
+ * Covers sw/source/filter/html/ fixes.
+ *
+ * Note that these tests are meant to be simple: either load a file and assert 
some result or build
+ * a document model with code, export and assert that result.
+ *
+ * Keep using the various sw_<format>import/export suites for multiple filter 
calls inside a single
+ * test.
+ */
+class Test : public SwModelTestBase
+{
+};
+
+CPPUNIT_TEST_FIXTURE(Test, testEmptyParagraph)
+{
+    // Given a document with 2 paragraphs, the second is empty:
+    OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + 
"empty-paragraph.xhtml";
+    uno::Sequence<beans::PropertyValue> aLoadArgs = {
+        comphelper::makePropertyValue("FilterName", OUString("HTML 
(StarWriter)")),
+        comphelper::makePropertyValue("FilterOptions", 
OUString("xhtmlns=reqif-xhtml")),
+    };
+
+    // When loading that file:
+    mxComponent = loadFromDesktop(aURL, OUString(), aLoadArgs);
+
+    // Then make sure that the resulting document has a 2nd empty paragraph:
+    getParagraph(1, "a");
+    // Without the accompanying fix in place, this test would have failed with:
+    // An uncaught exception of type 
com.sun.star.container.NoSuchElementException
+    // i.e. the 2nd paragraph was lost.
+    getParagraph(2);
+}
+}
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/filter/html/swhtml.cxx b/sw/source/filter/html/swhtml.cxx
index 9fcb2e0032dd..fa45f91406e3 100644
--- a/sw/source/filter/html/swhtml.cxx
+++ b/sw/source/filter/html/swhtml.cxx
@@ -4030,10 +4030,11 @@ void SwHTMLParser::EndPara( bool bReal )
 #endif
     }
 
-    // Netscape skips empty paragraphs, we do the same.
+    // Netscape skips empty paragraphs, we do the same; unless in XHTML mode, 
which prefers mapping
+    // the source document to the doc model 1:1 if possible.
     if( bReal )
     {
-        if( m_pPam->GetPoint()->GetContentIndex() )
+        if (m_pPam->GetPoint()->GetContentIndex() || m_bXHTML)
             AppendTextNode( AM_SPACE );
         else
             AddParSpace();

Reply via email to