external/python3/0001-3.6-closes-bpo-42938-Replace-snprintf-with-Python-un.patch.1
 |  175 ++++++++++
 external/python3/UnpackedTarball_python3.mk                                    
    |    1 
 shell/source/win32/SysShExec.cxx                                               
    |   32 +
 svtools/source/svhtml/parhtml.cxx                                              
    |   19 -
 sw/qa/core/data/ww5/pass/ofz18526-1.doc                                        
    |binary
 sw/source/core/unocore/unocrsrhelper.cxx                                       
    |   12 
 sw/source/filter/ww8/ww8par.cxx                                                
    |   56 ++-
 sw/source/filter/ww8/ww8par.hxx                                                
    |    2 
 sw/source/filter/ww8/ww8par5.cxx                                               
    |   37 +-
 writerfilter/source/rtftok/rtfdocumentimpl.cxx                                 
    |   25 +
 xmlsecurity/source/xmlsec/mscrypt/securityenvironment_mscryptimpl.cxx          
    |    4 
 11 files changed, 329 insertions(+), 34 deletions(-)

New commits:
commit 1ffc3deeba649b04186f9fed61d19afd8d8affbc
Author:     Caolán McNamara <caol...@redhat.com>
AuthorDate: Mon Feb 8 17:05:28 2021 +0000
Commit:     Gabor Kelemen <kelemen.gab...@nisz.hu>
CommitDate: Tue Oct 19 16:16:57 2021 +0200

    default to CertificateValidity::INVALID
    
    so if CertGetCertificateChain fails we don't want validity to be
    css::security::CertificateValidity::VALID which is what the old default
    of 0 equates to
    
    notably
    
    commit 1e0bc66d16aee28ce8bd9582ea32178c63841902
    Date:   Thu Nov 5 16:55:26 2009 +0100
    
        jl137:  #103420# better logging
    
    turned the nss equivalent of SecurityEnvironment_NssImpl::verifyCertificate
    from 0 to CertificateValidity::INVALID like this change does
    
    Change-Id: I5350dbc22d1b9b378da2976d3b0abd728f1f4c27
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/110561
    Tested-by: Jenkins
    Reviewed-by: Miklos Vajna <vmik...@collabora.com>
    (cherry picked from commit edeb164c1d8ab64116afee4e2140403a362a1358)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/113090
    Tested-by: Michael Stahl <michael.st...@allotropia.de>
    Reviewed-by: Michael Stahl <michael.st...@allotropia.de>

diff --git 
a/xmlsecurity/source/xmlsec/mscrypt/securityenvironment_mscryptimpl.cxx 
b/xmlsecurity/source/xmlsec/mscrypt/securityenvironment_mscryptimpl.cxx
index 55b732987d0f..75758d8f9ca3 100644
--- a/xmlsecurity/source/xmlsec/mscrypt/securityenvironment_mscryptimpl.cxx
+++ b/xmlsecurity/source/xmlsec/mscrypt/securityenvironment_mscryptimpl.cxx
@@ -800,7 +800,7 @@ sal_Int32 
SecurityEnvironment_MSCryptImpl::verifyCertificate(
     const Reference< css::security::XCertificate >& aCert,
     const Sequence< Reference< css::security::XCertificate > >& seqCerts)
 {
-    sal_Int32 validity = 0;
+    sal_Int32 validity = css::security::CertificateValidity::INVALID;
     PCCERT_CHAIN_CONTEXT pChainContext = nullptr;
     PCCERT_CONTEXT pCertContext = nullptr;
 
@@ -945,7 +945,7 @@ sal_Int32 
SecurityEnvironment_MSCryptImpl::verifyCertificate(
         }
         else
         {
-            SAL_INFO("xmlsecurity.xmlsec", "CertGetCertificateChaine failed.");
+            SAL_INFO("xmlsecurity.xmlsec", "CertGetCertificateChain failed.");
         }
     }
 
commit d10a2750e0b36041beb5f623388ccaaf47a4591d
Author:     Stephan Bergmann <sberg...@redhat.com>
AuthorDate: Tue Feb 16 09:30:09 2021 +0100
Commit:     Gabor Kelemen <kelemen.gab...@nisz.hu>
CommitDate: Tue Oct 19 16:16:51 2021 +0200

    Improve checkExtension
    
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/110970
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>
    Tested-by: Jenkins
    (cherry picked from commit f456c4dacf700e064e112ef068ff7edb04239754)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/110922
    Reviewed-by: Michael Stahl <michael.st...@allotropia.de>
    (cherry picked from commit f19d95986756412e5d72047656eec17a720c5e57)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/113088
    Tested-by: Michael Stahl <michael.st...@allotropia.de>
    
    Change-Id: Iff416a9c5930ad5903f7ee51a2abbc94d5f40800

diff --git a/shell/source/win32/SysShExec.cxx b/shell/source/win32/SysShExec.cxx
index a9e5a2c4ec7d..e90b68474486 100644
--- a/shell/source/win32/SysShExec.cxx
+++ b/shell/source/win32/SysShExec.cxx
@@ -377,21 +377,29 @@ void SAL_CALL CSysShExec::execute( const OUString& 
aCommand, const OUString& aPa
                         {}, 0);
                 }
             }
+            pathname = o3tl::toU(path);
+            // ShellExecuteExW appears to ignore trailing dots, so remove them:
+            while (pathname.endsWith(".", &pathname)) {}
             auto const n = pathname.lastIndexOf('.');
             if (n > pathname.lastIndexOf('\\')) {
                 auto const ext = pathname.copy(n + 1);
-                OUString env;
-                if (osl_getEnvironment(OUString("PATHEXT").pData, &env.pData) 
!= osl_Process_E_None)
-                {
-                    SAL_INFO("shell", "osl_getEnvironment(PATHEXT) failed");
-                }
-                if (!(checkExtension(ext, env)
-                      && checkExtension(
-                          ext,
-                          
".COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC;.PY;.CLASS;.JAR")))
-                {
-                    throw css::lang::IllegalArgumentException(
-                        "XSystemShellExecute.execute, cannot process <" + 
aCommand + ">", {}, 0);
+                if (!ext.isEmpty()) {
+                    OUString env;
+                    if (osl_getEnvironment(OUString("PATHEXT").pData, 
&env.pData)
+                        != osl_Process_E_None)
+                    {
+                        SAL_INFO("shell", "osl_getEnvironment(PATHEXT) 
failed");
+                    }
+                    if (!(checkExtension(ext, env)
+                          && checkExtension(
+                              ext,
+                              
".COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC;.PY;.CLASS;"
+                                  ".JAR;.APPLICATION;.LNK;.SCR")))
+                    {
+                        throw css::lang::IllegalArgumentException(
+                            "XSystemShellExecute.execute, cannot process <" + 
aCommand + ">", {},
+                            0);
+                    }
                 }
             }
         }
commit 6955dacbdacf479aa7dfa5f1f920963c47d96eeb
Author:     Michael Stahl <michael.st...@allotropia.de>
AuthorDate: Wed Feb 17 12:24:08 2021 +0100
Commit:     Gabor Kelemen <kelemen.gab...@nisz.hu>
CommitDate: Tue Oct 19 14:23:22 2021 +0200

    python3: add patch for CVE-2021-3177
    
    Looks like Python 3.5 is EOL, so backport the patch.
    
    Change-Id: I9ba397b3ed7e5f4ee4f78b144d822ce260ca9fb4
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/111059
    Tested-by: Michael Stahl <michael.st...@allotropia.de>
    Reviewed-by: Michael Stahl <michael.st...@allotropia.de>

diff --git 
a/external/python3/0001-3.6-closes-bpo-42938-Replace-snprintf-with-Python-un.patch.1
 
b/external/python3/0001-3.6-closes-bpo-42938-Replace-snprintf-with-Python-un.patch.1
new file mode 100644
index 000000000000..fdcc5cb65267
--- /dev/null
+++ 
b/external/python3/0001-3.6-closes-bpo-42938-Replace-snprintf-with-Python-un.patch.1
@@ -0,0 +1,175 @@
+From 34df10a9a16b38d54421eeeaf73ec89828563be7 Mon Sep 17 00:00:00 2001
+From: Benjamin Peterson <benja...@python.org>
+Date: Mon, 18 Jan 2021 15:11:46 -0600
+Subject: [PATCH] [3.6] closes bpo-42938: Replace snprintf with Python unicode
+ formatting in ctypes param reprs. (GH-24250)
+
+(cherry picked from commit 916610ef90a0d0761f08747f7b0905541f0977c7)
+
+Co-authored-by: Benjamin Peterson <benja...@python.org>
+---
+ Lib/ctypes/test/test_parameters.py            | 43 +++++++++++++++
+ .../2021-01-18-09-27-31.bpo-42938.4Zn4Mp.rst  |  2 +
+ Modules/_ctypes/callproc.c                    | 55 +++++++------------
+ 3 files changed, 66 insertions(+), 34 deletions(-)
+ create mode 100644 
Misc/NEWS.d/next/Security/2021-01-18-09-27-31.bpo-42938.4Zn4Mp.rst
+
+diff --git a/Lib/ctypes/test/test_parameters.py 
b/Lib/ctypes/test/test_parameters.py
+index e4c25fd880..531894fdec 100644
+--- a/Lib/ctypes/test/test_parameters.py
++++ b/Lib/ctypes/test/test_parameters.py
+@@ -201,6 +201,49 @@ def __dict__(self):
+         self.assertRaises(ArgumentError, func, 99)
+ 
+ 
++    def test_parameter_repr(self):
++        from ctypes import (
++            c_bool,
++            c_char,
++            c_wchar,
++            c_byte,
++            c_ubyte,
++            c_short,
++            c_ushort,
++            c_int,
++            c_uint,
++            c_long,
++            c_ulong,
++            c_longlong,
++            c_ulonglong,
++            c_float,
++            c_double,
++            c_longdouble,
++            c_char_p,
++            c_wchar_p,
++            c_void_p,
++        )
++        self.assertRegex(repr(c_bool.from_param(True)), r"^<cparam '\?' at 
0x[A-Fa-f0-9]+>$")
++        self.assertEqual(repr(c_char.from_param(97)), "<cparam 'c' ('a')>")
++        self.assertRegex(repr(c_wchar.from_param('a')), r"^<cparam 'u' at 
0x[A-Fa-f0-9]+>$")
++        self.assertEqual(repr(c_byte.from_param(98)), "<cparam 'b' (98)>")
++        self.assertEqual(repr(c_ubyte.from_param(98)), "<cparam 'B' (98)>")
++        self.assertEqual(repr(c_short.from_param(511)), "<cparam 'h' (511)>")
++        self.assertEqual(repr(c_ushort.from_param(511)), "<cparam 'H' (511)>")
++        self.assertRegex(repr(c_int.from_param(20000)), r"^<cparam '[li]' 
\(20000\)>$")
++        self.assertRegex(repr(c_uint.from_param(20000)), r"^<cparam '[LI]' 
\(20000\)>$")
++        self.assertRegex(repr(c_long.from_param(20000)), r"^<cparam '[li]' 
\(20000\)>$")
++        self.assertRegex(repr(c_ulong.from_param(20000)), r"^<cparam '[LI]' 
\(20000\)>$")
++        self.assertRegex(repr(c_longlong.from_param(20000)), r"^<cparam 
'[liq]' \(20000\)>$")
++        self.assertRegex(repr(c_ulonglong.from_param(20000)), r"^<cparam 
'[LIQ]' \(20000\)>$")
++        self.assertEqual(repr(c_float.from_param(1.5)), "<cparam 'f' (1.5)>")
++        self.assertEqual(repr(c_double.from_param(1.5)), "<cparam 'd' (1.5)>")
++        self.assertEqual(repr(c_double.from_param(1e300)), "<cparam 'd' 
(1e+300)>")
++        self.assertRegex(repr(c_longdouble.from_param(1.5)), r"^<cparam ('d' 
\(1.5\)|'g' at 0x[A-Fa-f0-9]+)>$")
++        self.assertRegex(repr(c_char_p.from_param(b'hihi')), "^<cparam 'z' 
\(0x[A-Fa-f0-9]+\)>$")
++        self.assertRegex(repr(c_wchar_p.from_param('hihi')), "^<cparam 'Z' 
\(0x[A-Fa-f0-9]+\)>$")
++        self.assertRegex(repr(c_void_p.from_param(0x12)), r"^<cparam 'P' 
\(0x0*12\)>$")
++
+ ################################################################
+ 
+ if __name__ == '__main__':
+diff --git 
a/Misc/NEWS.d/next/Security/2021-01-18-09-27-31.bpo-42938.4Zn4Mp.rst 
b/Misc/NEWS.d/next/Security/2021-01-18-09-27-31.bpo-42938.4Zn4Mp.rst
+new file mode 100644
+index 0000000000..7df65a156f
+--- /dev/null
++++ b/Misc/NEWS.d/next/Security/2021-01-18-09-27-31.bpo-42938.4Zn4Mp.rst
+@@ -0,0 +1,2 @@
++Avoid static buffers when computing the repr of :class:`ctypes.c_double` and
++:class:`ctypes.c_longdouble` values.
+diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c
+index 70e416b950..9fcf95f543 100644
+--- a/Modules/_ctypes/callproc.c
++++ b/Modules/_ctypes/callproc.c
+@@ -451,54 +451,43 @@ PyCArg_dealloc(PyCArgObject *self)
+ static PyObject *
+ PyCArg_repr(PyCArgObject *self)
+ {
+-    char buffer[256];
+     switch(self->tag) {
+     case 'b':
+     case 'B':
+-        sprintf(buffer, "<cparam '%c' (%d)>",
++        return PyUnicode_FromFormat("<cparam '%c' (%d)>",
+             self->tag, self->value.b);
+-        break;
+     case 'h':
+     case 'H':
+-        sprintf(buffer, "<cparam '%c' (%d)>",
++        return PyUnicode_FromFormat("<cparam '%c' (%d)>",
+             self->tag, self->value.h);
+-        break;
+     case 'i':
+     case 'I':
+-        sprintf(buffer, "<cparam '%c' (%d)>",
++        return PyUnicode_FromFormat("<cparam '%c' (%d)>",
+             self->tag, self->value.i);
+-        break;
+     case 'l':
+     case 'L':
+-        sprintf(buffer, "<cparam '%c' (%ld)>",
++        return PyUnicode_FromFormat("<cparam '%c' (%ld)>",
+             self->tag, self->value.l);
+-        break;
+
+ #ifdef HAVE_LONG_LONG
+     case 'q':
+     case 'Q':
+-        sprintf(buffer,
+-#ifdef MS_WIN32
+-            "<cparam '%c' (%I64d)>",
+-#else
+-            "<cparam '%c' (%qd)>",
+-#endif
++        return PyUnicode_FromFormat("<cparam '%c' (%qd)>",
+             self->tag, self->value.q);
+-        break;
+ #endif
+     case 'd':
+-        sprintf(buffer, "<cparam '%c' (%f)>",
+-            self->tag, self->value.d);
+-        break;
+-    case 'f':
+-        sprintf(buffer, "<cparam '%c' (%f)>",
+-            self->tag, self->value.f);
+-        break;
+-
++    case 'f': {
++        PyObject *f = PyFloat_FromDouble((self->tag == 'f') ? self->value.f : 
self->value.d);
++        if (f == NULL) {
++            return NULL;
++        }
++        { PyObject *result = PyUnicode_FromFormat("<cparam '%c' (%R)>", 
self->tag, f);
++          Py_DECREF(f);
++          return result; }
++    }
+     case 'c':
+-        sprintf(buffer, "<cparam '%c' (%c)>",
++        return PyUnicode_FromFormat("<cparam '%c' ('%c')>",
+             self->tag, self->value.c);
+-        break;
+
+ /* Hm, are these 'z' and 'Z' codes useful at all?
+    Shouldn't they be replaced by the functionality of c_string
+@@ -507,16 +495,14 @@ PyCArg_repr(PyCArgObject *self)
+     case 'z':
+     case 'Z':
+     case 'P':
+-        sprintf(buffer, "<cparam '%c' (%p)>",
++        return PyUnicode_FromFormat("<cparam '%c' (%p)>",
+             self->tag, self->value.p);
+         break;
+
+     default:
+-        sprintf(buffer, "<cparam '%c' at %p>",
+-            self->tag, self);
+-        break;
++        return PyUnicode_FromFormat("<cparam '%c' at %p>",
++            (unsigned char)self->tag, (void *)self);
+     }
+-    return PyUnicode_FromString(buffer);
+ }
+
+ static PyMemberDef PyCArgType_members[] = {
+-- 
+2.29.2
+
diff --git a/external/python3/UnpackedTarball_python3.mk 
b/external/python3/UnpackedTarball_python3.mk
index ee99de1f5e0c..81a392f76f48 100644
--- a/external/python3/UnpackedTarball_python3.mk
+++ b/external/python3/UnpackedTarball_python3.mk
@@ -26,6 +26,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,python3,\
        external/python3/ubsan.patch.0 \
        external/python3/python-3.5.tweak.strip.soabi.patch \
        
external/python3/0001-3.6-bpo-17239-Disable-external-entities-in-SAX-parse.patch.1
 \
+       
external/python3/0001-3.6-closes-bpo-42938-Replace-snprintf-with-Python-un.patch.1
 \
 ))
 
 ifneq ($(filter DRAGONFLY FREEBSD LINUX NETBSD OPENBSD SOLARIS,$(OS)),)
commit 586d2c59df0ee87dccdc237e99691f7d5b12c393
Author:     Michael Stahl <michael.st...@cib.de>
AuthorDate: Wed Oct 30 16:06:02 2019 +0100
Commit:     Gabor Kelemen <kelemen.gab...@nisz.hu>
CommitDate: Tue Oct 19 13:57:07 2021 +0200

    sw: WW8 import: filter control characters in GetFieldResult()
    
    Triggers the assert in SwSubFont::GetTextSize_() on ooo58234-1.doc,
    which has a field result with ^G cell separators that is converted to
    SwInputField, which inserts the field result into SwTextNode.
    
    Change-Id: Ibdb93390862a11462d62cf744bac912d6009777e
    Reviewed-on: https://gerrit.libreoffice.org/81788
    Tested-by: Jenkins
    Reviewed-by: Michael Stahl <michael.st...@cib.de>
    (cherry picked from commit 3a9d504b01c061f60a915b5681c8313859294118)

diff --git a/sw/source/filter/ww8/ww8par5.cxx b/sw/source/filter/ww8/ww8par5.cxx
index d4ced6eec07b..d2ab475b3630 100644
--- a/sw/source/filter/ww8/ww8par5.cxx
+++ b/sw/source/filter/ww8/ww8par5.cxx
@@ -32,6 +32,7 @@
 #include <com/sun/star/task/InteractionHandler.hpp>
 
 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
+#include <svl/lngmisc.hxx>
 #include <svl/urihelper.hxx>
 #include <svl/zforlist.hxx>
 #include <svl/zformat.hxx>
@@ -1192,7 +1193,35 @@ OUString SwWW8ImplReader::GetFieldResult( WW8FieldDesc* 
pF )
     m_pStrm->Seek( nOldPos );
 
     //replace both CR 0x0D and VT 0x0B with LF 0x0A
-    return sRes.replace(0x0D, 0x0A).replace(0x0B, 0x0A);
+    // at least in the cases where the result is added to an SwInputField
+    // there must not be control characters in it
+    OUStringBuffer buf(sRes.getLength());
+    for (sal_Int32 i = 0; i < sRes.getLength(); ++i)
+    {
+        sal_Unicode const ch(sRes[i]);
+        if (!linguistic::IsControlChar(ch))
+        {
+            buf.append(ch);
+        }
+        else
+        {
+            switch (ch)
+            {
+                case 0x0B:
+                case '\r':
+                    buf.append('\n');
+                    break;
+                case '\n':
+                case '\t':
+                    buf.append(ch);
+                    break;
+                default:
+                    SAL_INFO("sw.ww8", "GetFieldResult(): filtering control 
character");
+                    break;
+            }
+        }
+    }
+    return buf.makeStringAndClear();
 }
 
 /*
commit c8ee4b2347ddce1427dc2999e7e13c97a0e917cb
Author:     Michael Stahl <michael.st...@cib.de>
AuthorDate: Tue Nov 12 18:57:58 2019 +0100
Commit:     Gabor Kelemen <kelemen.gab...@nisz.hu>
CommitDate: Tue Oct 19 12:38:39 2021 +0200

    ofz#18526 sw: WW8 import: don't insert control characters
    
    Sanitize string before calling InsertString().
    
    This segfaults since:
    
    commit b522fc0646915d4da94df38dd249c88b28f25be7
    Date:   Tue Sep 24 18:11:45 2019 +0200
    
        sw: maintain fieldmarks in DeleteRange()/DeleteAndJoin()/ReplaceRange()
    
    Reviewed-on: https://gerrit.libreoffice.org/81949
    Tested-by: Jenkins
    Reviewed-by: Caolán McNamara <caol...@redhat.com>
    Tested-by: Caolán McNamara <caol...@redhat.com>
    (cherry picked from commit 7ecda38cdaa2361e8510bf3e7206863c4936deab)
    Reviewed-on: https://gerrit.libreoffice.org/82759
    (cherry picked from commit d494a4c0ead7db481757d8d67fbce9e1b02e65df)
    
    Change-Id: I9ef73d924420686f6838fa21900ec57b4d25c905

diff --git a/sw/qa/core/data/ww5/pass/ofz18526-1.doc 
b/sw/qa/core/data/ww5/pass/ofz18526-1.doc
new file mode 100644
index 000000000000..e651650f9a26
Binary files /dev/null and b/sw/qa/core/data/ww5/pass/ofz18526-1.doc differ
diff --git a/sw/source/filter/ww8/ww8par.cxx b/sw/source/filter/ww8/ww8par.cxx
index a4f0817b8216..ca9065f14acc 100644
--- a/sw/source/filter/ww8/ww8par.cxx
+++ b/sw/source/filter/ww8/ww8par.cxx
@@ -108,6 +108,8 @@
 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
 #include <com/sun/star/document/XViewDataSupplier.hpp>
 #include <com/sun/star/document/IndexedPropertyValues.hpp>
+
+#include <svl/lngmisc.hxx>
 #include <svl/itemiter.hxx>
 
 #include <comphelper/processfactory.hxx>
@@ -3346,14 +3348,38 @@ void 
SwWW8ImplReader::emulateMSWordAddTextToParagraph(const OUString& rAddString
     }
 }
 
+namespace sw {
+
+auto FilterControlChars(OUString const& rString) -> OUString
+{
+    OUStringBuffer buf(rString.getLength());
+    for (sal_Int32 i = 0; i < rString.getLength(); ++i)
+    {
+        sal_Unicode const ch(rString[i]);
+        if (!linguistic::IsControlChar(ch) || ch == '\r' || ch == '\n' || ch 
== '\t')
+        {
+            buf.append(ch);
+        }
+        else
+        {
+            SAL_INFO("sw.ww8", "filtering control character");
+        }
+    }
+    return buf.makeStringAndClear();
+}
+
+} // namespace sw
+
 void SwWW8ImplReader::simpleAddTextToParagraph(const OUString& rAddString)
 {
-    if (rAddString.isEmpty())
+    OUString const addString(sw::FilterControlChars(rAddString));
+
+    if (addString.isEmpty())
         return;
 
 #if OSL_DEBUG_LEVEL > 1
         {
-            OString sText(OUStringToOString(rAddString, 
RTL_TEXTENCODING_UTF8));
+            OString sText(OUStringToOString(AddString, RTL_TEXTENCODING_UTF8));
             SAL_INFO("sw.ww8", "<addTextToParagraph>" << sText.getStr() << 
"</addTextToParagraph>");
         }
 #endif
@@ -3369,21 +3395,21 @@ void SwWW8ImplReader::simpleAddTextToParagraph(const 
OUString& rAddString)
     const sal_Int32 nCharsLeft = SAL_MAX_INT32 - pNd->GetText().getLength();
     if (nCharsLeft > 0)
     {
-        if (rAddString.getLength() <= nCharsLeft)
+        if (addString.getLength() <= nCharsLeft)
         {
-            m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, 
rAddString);
+            m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, 
addString);
         }
         else
         {
-            m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, 
rAddString.copy(0, nCharsLeft));
+            m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, 
addString.copy(0, nCharsLeft));
             AppendTextNode(*m_pPaM->GetPoint());
-            m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, 
rAddString.copy(nCharsLeft));
+            m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, 
addString.copy(nCharsLeft));
         }
     }
     else
     {
         AppendTextNode(*m_pPaM->GetPoint());
-        m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, 
rAddString);
+        m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, 
addString);
     }
 
     m_bReadTable = false;
@@ -3409,13 +3435,17 @@ bool SwWW8ImplReader::ReadChars(WW8_CP& rPos, WW8_CP 
nNextAttr, long nTextEnd,
                 nRequested = nMaxPossible;
             }
 
-            for (WW8_CP nCh = 0; nCh < nRequested; ++nCh)
+            if (!linguistic::IsControlChar(m_cSymbol)
+                || m_cSymbol == '\r' || m_cSymbol == '\n' || m_cSymbol == '\t')
             {
-                m_rDoc.getIDocumentContentOperations().InsertString( *m_pPaM, 
OUString(m_cSymbol) );
+                for (WW8_CP nCh = 0; nCh < nRequested; ++nCh)
+                {
+                    
m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, 
OUString(m_cSymbol));
+                }
+                m_pCtrlStck->SetAttr(*m_pPaM->GetPoint(), RES_CHRATR_FONT);
+                m_pCtrlStck->SetAttr(*m_pPaM->GetPoint(), RES_CHRATR_CJK_FONT);
+                m_pCtrlStck->SetAttr(*m_pPaM->GetPoint(), RES_CHRATR_CTL_FONT);
             }
-            m_pCtrlStck->SetAttr( *m_pPaM->GetPoint(), RES_CHRATR_FONT );
-            m_pCtrlStck->SetAttr( *m_pPaM->GetPoint(), RES_CHRATR_CJK_FONT );
-            m_pCtrlStck->SetAttr( *m_pPaM->GetPoint(), RES_CHRATR_CTL_FONT );
         }
         m_pStrm->SeekRel(nRequested);
         rPos = nEnd; // Ignore until attribute end
diff --git a/sw/source/filter/ww8/ww8par.hxx b/sw/source/filter/ww8/ww8par.hxx
index b0a20a038369..a2867d40177e 100644
--- a/sw/source/filter/ww8/ww8par.hxx
+++ b/sw/source/filter/ww8/ww8par.hxx
@@ -542,6 +542,8 @@ namespace sw
             sal_Int32 GetPtContent() { return mnPtContent; };
         };
     }
+
+    auto FilterControlChars(OUString const& rString) -> OUString;
 }
 
 class WW8FieldEntry
diff --git a/sw/source/filter/ww8/ww8par5.cxx b/sw/source/filter/ww8/ww8par5.cxx
index 82efb06481fd..d4ced6eec07b 100644
--- a/sw/source/filter/ww8/ww8par5.cxx
+++ b/sw/source/filter/ww8/ww8par5.cxx
@@ -35,6 +35,7 @@
 #include <svl/urihelper.hxx>
 #include <svl/zforlist.hxx>
 #include <svl/zformat.hxx>
+#include <svl/lngmisc.hxx>
 #include <sfx2/linkmgr.hxx>
 
 #include <ucbhelper/content.hxx>
@@ -1886,7 +1887,8 @@ eF_ResT SwWW8ImplReader::Read_F_Symbol( WW8FieldDesc*, 
OUString& rStr )
     if( aQ.isEmpty() )
         return eF_ResT::TAGIGN;                      // -> no 0-char in text
 
-    if (sal_Unicode cChar = static_cast<sal_Unicode>(aQ.toInt32()))
+    sal_Unicode const cChar = static_cast<sal_Unicode>(aQ.toInt32());
+    if (!linguistic::IsControlChar(cChar) || cChar == '\r' || cChar == '\n' || 
cChar == '\t')
     {
         if (!aName.isEmpty())                           // Font Name set ?
         {
@@ -2667,11 +2669,11 @@ void SwWW8ImplReader::Read_SubF_Ruby( 
WW8ReadFieldParams& rReadParam)
                             if ((nBegin != -1) && (nEnd != -1) && (nBegin < 
nEnd))
                             {
                                 sText = sPart.copy(nBegin+1,nEnd-nBegin-1);
+                                sText = sw::FilterControlChars(sText);
                             }
                         }
                     }
                 }
-
             }
             break;
         }
commit 219499f20068ee09361c23cb1b9e3fb8f14f8c4a
Author:     Michael Stahl <michael.st...@cib.de>
AuthorDate: Thu Nov 14 17:37:17 2019 +0100
Commit:     Gabor Kelemen <kelemen.gab...@nisz.hu>
CommitDate: Tue Oct 19 11:57:43 2021 +0200

    sw: WW8 import: instead of control character insert '?' for footnote
    
    SwWW8ImplReader::ReadChar() inserts a U+0002 control character to
    temporarily mark a footnote anchor; this is then deleted and replaced
    with a real footnote hint by SwWW8ImplReader::End_Footnote().
    
    The assumption is that it is necessary to insert a placeholder
    character to be able to apply formatting to it.
    
    But if the document is corrupted, the control character could survive
    the import, which sounds less than ideal.
    
    So either make this magic character more explicit by documenting it in
    hintids.hxx and removing any outstanding ones at the end of the import,
    or use a non-offensive character instead; since this should only affect
    invalid documents, choose the solution with the least effort.
    
    Change-Id: I76d396258b32e0f0fb6393942a58a4dc57912211
    Reviewed-on: https://gerrit.libreoffice.org/82760
    Tested-by: Jenkins
    Reviewed-by: Caolán McNamara <caol...@redhat.com>
    Tested-by: Caolán McNamara <caol...@redhat.com>
    (cherry picked from commit 13ba765c444713b0b0b2f4b4231bdafcbbef6ad0)

diff --git a/sw/source/filter/ww8/ww8par.cxx b/sw/source/filter/ww8/ww8par.cxx
index cd9ba231d788..a4f0817b8216 100644
--- a/sw/source/filter/ww8/ww8par.cxx
+++ b/sw/source/filter/ww8/ww8par.cxx
@@ -3685,7 +3685,7 @@ bool SwWW8ImplReader::ReadChar(long nPosCp, long nCpOfs)
             break;
         case 0x2:               // TODO: Auto-Footnote-Number, should be 
replaced by SwWW8ImplReader::End_Footnote later
             if (!m_aFootnoteStack.empty())
-                cInsert = 0x2;
+                cInsert = '?';
             break;
         default:
             SAL_INFO( "sw.ww8.level2", "<unknownValue val=\"" << nWCharVal << 
"\">" );
commit e6b46b8281884206e878348224accffb4b145701
Author:     Michael Stahl <michael.st...@cib.de>
AuthorDate: Tue Oct 29 15:54:41 2019 +0100
Commit:     Gabor Kelemen <kelemen.gab...@nisz.hu>
CommitDate: Tue Oct 19 11:48:37 2021 +0200

    writerfilter: rtftok: filter control characters
    
    ... in RTFDocumentImpl::checkUnicode(); see ooo86460-1.xls [sic]
    for an example.
    
    There is another caller of text() in rtfdispatchdestination.cxx:311 but
    it turns out that buffered text was created by text() in the first
    place.
    
    This shouldn't be a problem for DOCX because XML 1.0 doesn't allow the
    bad control characters anyway so the sax parser should report an error
    in that case.
    
    Reviewed-on: https://gerrit.libreoffice.org/81697
    Reviewed-by: Michael Stahl <michael.st...@cib.de>
    Tested-by: Michael Stahl <michael.st...@cib.de>
    (cherry picked from commit a6516c76c01b92f7d35bfb352b63af7de42b5707)
    
    Change-Id: Ice45e1c3c8c7db668a4cfb8364e42addea1777ce

diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx 
b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index af8ef382b53a..dc25bc834980 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -34,6 +34,7 @@
 #include <comphelper/embeddedobjectcontainer.hxx>
 #include <comphelper/sequenceashashmap.hxx>
 #include <comphelper/sequence.hxx>
+#include <svl/lngmisc.hxx>
 #include <sfx2/sfxbasemodel.hxx>
 #include <sfx2/classificationhelper.hxx>
 #include <sfx2/sfx.hrc>
@@ -3191,16 +3192,40 @@ void RTFDocumentImpl::setSkipUnknown(bool bSkipUnknown)
     m_bSkipUnknown = bSkipUnknown;
 }
 
+static auto FilterControlChars(Destination const destination, OUString const& 
rString) -> OUString
+{
+    if (destination == Destination::LEVELNUMBERS || destination == 
Destination::LEVELTEXT)
+    { // control characters are magic here!
+        return rString;
+    }
+    OUStringBuffer buf(rString.getLength());
+    for (sal_Int32 i = 0; i < rString.getLength(); ++i)
+    {
+        sal_Unicode const ch(rString[i]);
+        if (!linguistic::IsControlChar(ch) || ch == '\r' || ch == '\n' || ch 
== '\t')
+        {
+            buf.append(ch);
+        }
+        else
+        {
+            SAL_INFO("writerfilter.rtf", "filtering control character");
+        }
+    }
+    return buf.makeStringAndClear();
+}
+
 void RTFDocumentImpl::checkUnicode(bool bUnicode, bool bHex)
 {
     if (bUnicode && !m_aUnicodeBuffer.isEmpty())
     {
         OUString aString = m_aUnicodeBuffer.makeStringAndClear();
+        aString = FilterControlChars(m_aStates.top().eDestination, aString);
         text(aString);
     }
     if (bHex && !m_aHexBuffer.isEmpty())
     {
         OUString aString = 
OStringToOUString(m_aHexBuffer.makeStringAndClear(), 
m_aStates.top().nCurrentEncoding);
+        aString = FilterControlChars(m_aStates.top().eDestination, aString);
         text(aString);
     }
 }
commit ef5e893b57e9ca98236c9176438db61c90107a3d
Author:     Michael Stahl <michael.st...@cib.de>
AuthorDate: Tue Oct 29 15:52:34 2019 +0100
Commit:     Gabor Kelemen <kelemen.gab...@nisz.hu>
CommitDate: Tue Oct 19 11:03:48 2021 +0200

    sw: UNO API: do not allow inserting control characters into nodes
    
    Refuse invalid input in DocInsertStringSplitCR().
    
    Reviewed-on: https://gerrit.libreoffice.org/81696
    Tested-by: Jenkins
    Reviewed-by: Michael Stahl <michael.st...@cib.de>
    (cherry picked from commit 9b1e3e9bfdc0639630a367e45e4bdc2e9f22e503)
    
    Change-Id: I097c1b3a1f70b0cf1fa3fc33fc1d965ee6c96280

diff --git a/sw/source/core/unocore/unocrsrhelper.cxx 
b/sw/source/core/unocore/unocrsrhelper.cxx
index 71faf3dbb0bd..547598423ab7 100644
--- a/sw/source/core/unocore/unocrsrhelper.cxx
+++ b/sw/source/core/unocore/unocrsrhelper.cxx
@@ -65,6 +65,7 @@
 #include <cntfrm.hxx>
 #include <pagefrm.hxx>
 #include <svl/eitem.hxx>
+#include <svl/lngmisc.hxx>
 #include <docary.hxx>
 #include <swtable.hxx>
 #include <tox.hxx>
@@ -1095,6 +1096,17 @@ bool DocInsertStringSplitCR(
 {
     bool bOK = true;
 
+    for (sal_Int32 i = 0; i < rText.getLength(); ++i)
+    {
+        sal_Unicode const ch(rText[i]);
+        if (linguistic::IsControlChar(ch)
+            && ch != '\r' && ch != '\n' && ch != '\t')
+        {
+            SAL_WARN("sw.uno", "DocInsertStringSplitCR: refusing to insert 
control character " << int(ch));
+            return false;
+        }
+    }
+
         const SwInsertFlags nInsertFlags =
             bForceExpandHints
             ? ( SwInsertFlags::FORCEHINTEXPAND | SwInsertFlags::EMPTYEXPAND)
commit 2e2efeb822262ffdbd9f85790b7389453c615fd2
Author:     Michael Stahl <michael.st...@cib.de>
AuthorDate: Mon Oct 28 14:31:23 2019 +0100
Commit:     Gabor Kelemen <kelemen.gab...@nisz.hu>
CommitDate: Tue Oct 19 10:43:56 2021 +0200

    svl: HTMLParser: stop inserting control character garbage into Writer
    
    E.g. rhbz433940-1.html contains literal ^G characters that are inserted
    as-is into SwTextNodes.
    
    This now triggers assert about CH_TXT_ATR_FIELDSTART in
    SwSubFont::GetTextSize_() that was added in
    19a559b0ec9b806519c405651d6d2b2e14712b4a.
    
    Change-Id: I6aa7de41a04069e15b40865fd57894dae0fc10db
    Reviewed-on: https://gerrit.libreoffice.org/81606
    Reviewed-by: Michael Stahl <michael.st...@cib.de>
    Tested-by: Michael Stahl <michael.st...@cib.de>
    (cherry picked from commit 35d248cab1f0d4800f72abb5cb6afb56f40d9083)

diff --git a/svtools/source/svhtml/parhtml.cxx 
b/svtools/source/svhtml/parhtml.cxx
index 5f8ef05318c2..53f3626d74a3 100644
--- a/svtools/source/svhtml/parhtml.cxx
+++ b/svtools/source/svhtml/parhtml.cxx
@@ -29,6 +29,7 @@
 #include <tools/datetime.hxx>
 #include <unotools/datetime.hxx>
 #include <svl/inettype.hxx>
+#include <svl/lngmisc.hxx>
 #include <com/sun/star/beans/PropertyAttribute.hpp>
 #include <com/sun/star/document/XDocumentProperties.hpp>
 
@@ -432,8 +433,12 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak 
)
                     else
                         nNextCh = 0U;
 
-                    if ( ! rtl::isUnicodeCodePoint( cChar ) )
+                    if (!rtl::isUnicodeCodePoint(cChar)
+                        || (linguistic::IsControlChar(cChar)
+                            && cChar != '\r' && cChar != '\n' && cChar != 
'\t'))
+                    {
                         cChar = '?';
+                    }
                 }
                 else if( rtl::isAsciiAlpha( nNextCh ) )
                 {
@@ -721,8 +726,11 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak 
)
             else
             {
                 do {
+                    if (!linguistic::IsControlChar(nNextCh))
+                    {
                     // All remaining characters make their way into the text.
-                    sTmpBuffer.appendUtf32( nNextCh );
+                        sTmpBuffer.appendUtf32( nNextCh );
+                    }
                     if( MAX_LEN == sTmpBuffer.getLength() )
                     {
                         aToken += sTmpBuffer.makeStringAndClear();
@@ -949,8 +957,11 @@ HtmlTokenId HTMLParser::GetNextRawToken()
             }
             SAL_FALLTHROUGH;
         default:
-            // all remaining characters are appended to the buffer
-            sTmpBuffer.appendUtf32( nNextCh );
+            if (!linguistic::IsControlChar(nNextCh) || nNextCh == '\t')
+            {
+                // all remaining characters are appended to the buffer
+                sTmpBuffer.appendUtf32( nNextCh );
+            }
             break;
         }
 

Reply via email to