external/python3/0001-3.6-closes-bpo-42938-Replace-snprintf-with-Python-un.patch.1 | 175 ++++++++++ external/python3/UnpackedTarball_python3.mk | 1 shell/source/win32/SysShExec.cxx | 32 + svtools/source/svhtml/parhtml.cxx | 19 - sw/qa/core/data/ww5/pass/ofz18526-1.doc |binary sw/source/core/unocore/unocrsrhelper.cxx | 12 sw/source/filter/ww8/ww8par.cxx | 56 ++- sw/source/filter/ww8/ww8par.hxx | 2 sw/source/filter/ww8/ww8par5.cxx | 37 +- writerfilter/source/rtftok/rtfdocumentimpl.cxx | 25 + xmlsecurity/source/xmlsec/mscrypt/securityenvironment_mscryptimpl.cxx | 4 11 files changed, 329 insertions(+), 34 deletions(-)
New commits: commit 1ffc3deeba649b04186f9fed61d19afd8d8affbc Author: Caolán McNamara <caol...@redhat.com> AuthorDate: Mon Feb 8 17:05:28 2021 +0000 Commit: Gabor Kelemen <kelemen.gab...@nisz.hu> CommitDate: Tue Oct 19 16:16:57 2021 +0200 default to CertificateValidity::INVALID so if CertGetCertificateChain fails we don't want validity to be css::security::CertificateValidity::VALID which is what the old default of 0 equates to notably commit 1e0bc66d16aee28ce8bd9582ea32178c63841902 Date: Thu Nov 5 16:55:26 2009 +0100 jl137: #103420# better logging turned the nss equivalent of SecurityEnvironment_NssImpl::verifyCertificate from 0 to CertificateValidity::INVALID like this change does Change-Id: I5350dbc22d1b9b378da2976d3b0abd728f1f4c27 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/110561 Tested-by: Jenkins Reviewed-by: Miklos Vajna <vmik...@collabora.com> (cherry picked from commit edeb164c1d8ab64116afee4e2140403a362a1358) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/113090 Tested-by: Michael Stahl <michael.st...@allotropia.de> Reviewed-by: Michael Stahl <michael.st...@allotropia.de> diff --git a/xmlsecurity/source/xmlsec/mscrypt/securityenvironment_mscryptimpl.cxx b/xmlsecurity/source/xmlsec/mscrypt/securityenvironment_mscryptimpl.cxx index 55b732987d0f..75758d8f9ca3 100644 --- a/xmlsecurity/source/xmlsec/mscrypt/securityenvironment_mscryptimpl.cxx +++ b/xmlsecurity/source/xmlsec/mscrypt/securityenvironment_mscryptimpl.cxx @@ -800,7 +800,7 @@ sal_Int32 SecurityEnvironment_MSCryptImpl::verifyCertificate( const Reference< css::security::XCertificate >& aCert, const Sequence< Reference< css::security::XCertificate > >& seqCerts) { - sal_Int32 validity = 0; + sal_Int32 validity = css::security::CertificateValidity::INVALID; PCCERT_CHAIN_CONTEXT pChainContext = nullptr; PCCERT_CONTEXT pCertContext = nullptr; @@ -945,7 +945,7 @@ sal_Int32 SecurityEnvironment_MSCryptImpl::verifyCertificate( } else { - SAL_INFO("xmlsecurity.xmlsec", "CertGetCertificateChaine failed."); + SAL_INFO("xmlsecurity.xmlsec", "CertGetCertificateChain failed."); } } commit d10a2750e0b36041beb5f623388ccaaf47a4591d Author: Stephan Bergmann <sberg...@redhat.com> AuthorDate: Tue Feb 16 09:30:09 2021 +0100 Commit: Gabor Kelemen <kelemen.gab...@nisz.hu> CommitDate: Tue Oct 19 16:16:51 2021 +0200 Improve checkExtension Reviewed-on: https://gerrit.libreoffice.org/c/core/+/110970 Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> Tested-by: Jenkins (cherry picked from commit f456c4dacf700e064e112ef068ff7edb04239754) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/110922 Reviewed-by: Michael Stahl <michael.st...@allotropia.de> (cherry picked from commit f19d95986756412e5d72047656eec17a720c5e57) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/113088 Tested-by: Michael Stahl <michael.st...@allotropia.de> Change-Id: Iff416a9c5930ad5903f7ee51a2abbc94d5f40800 diff --git a/shell/source/win32/SysShExec.cxx b/shell/source/win32/SysShExec.cxx index a9e5a2c4ec7d..e90b68474486 100644 --- a/shell/source/win32/SysShExec.cxx +++ b/shell/source/win32/SysShExec.cxx @@ -377,21 +377,29 @@ void SAL_CALL CSysShExec::execute( const OUString& aCommand, const OUString& aPa {}, 0); } } + pathname = o3tl::toU(path); + // ShellExecuteExW appears to ignore trailing dots, so remove them: + while (pathname.endsWith(".", &pathname)) {} auto const n = pathname.lastIndexOf('.'); if (n > pathname.lastIndexOf('\\')) { auto const ext = pathname.copy(n + 1); - OUString env; - if (osl_getEnvironment(OUString("PATHEXT").pData, &env.pData) != osl_Process_E_None) - { - SAL_INFO("shell", "osl_getEnvironment(PATHEXT) failed"); - } - if (!(checkExtension(ext, env) - && checkExtension( - ext, - ".COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC;.PY;.CLASS;.JAR"))) - { - throw css::lang::IllegalArgumentException( - "XSystemShellExecute.execute, cannot process <" + aCommand + ">", {}, 0); + if (!ext.isEmpty()) { + OUString env; + if (osl_getEnvironment(OUString("PATHEXT").pData, &env.pData) + != osl_Process_E_None) + { + SAL_INFO("shell", "osl_getEnvironment(PATHEXT) failed"); + } + if (!(checkExtension(ext, env) + && checkExtension( + ext, + ".COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC;.PY;.CLASS;" + ".JAR;.APPLICATION;.LNK;.SCR"))) + { + throw css::lang::IllegalArgumentException( + "XSystemShellExecute.execute, cannot process <" + aCommand + ">", {}, + 0); + } } } } commit 6955dacbdacf479aa7dfa5f1f920963c47d96eeb Author: Michael Stahl <michael.st...@allotropia.de> AuthorDate: Wed Feb 17 12:24:08 2021 +0100 Commit: Gabor Kelemen <kelemen.gab...@nisz.hu> CommitDate: Tue Oct 19 14:23:22 2021 +0200 python3: add patch for CVE-2021-3177 Looks like Python 3.5 is EOL, so backport the patch. Change-Id: I9ba397b3ed7e5f4ee4f78b144d822ce260ca9fb4 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/111059 Tested-by: Michael Stahl <michael.st...@allotropia.de> Reviewed-by: Michael Stahl <michael.st...@allotropia.de> diff --git a/external/python3/0001-3.6-closes-bpo-42938-Replace-snprintf-with-Python-un.patch.1 b/external/python3/0001-3.6-closes-bpo-42938-Replace-snprintf-with-Python-un.patch.1 new file mode 100644 index 000000000000..fdcc5cb65267 --- /dev/null +++ b/external/python3/0001-3.6-closes-bpo-42938-Replace-snprintf-with-Python-un.patch.1 @@ -0,0 +1,175 @@ +From 34df10a9a16b38d54421eeeaf73ec89828563be7 Mon Sep 17 00:00:00 2001 +From: Benjamin Peterson <benja...@python.org> +Date: Mon, 18 Jan 2021 15:11:46 -0600 +Subject: [PATCH] [3.6] closes bpo-42938: Replace snprintf with Python unicode + formatting in ctypes param reprs. (GH-24250) + +(cherry picked from commit 916610ef90a0d0761f08747f7b0905541f0977c7) + +Co-authored-by: Benjamin Peterson <benja...@python.org> +--- + Lib/ctypes/test/test_parameters.py | 43 +++++++++++++++ + .../2021-01-18-09-27-31.bpo-42938.4Zn4Mp.rst | 2 + + Modules/_ctypes/callproc.c | 55 +++++++------------ + 3 files changed, 66 insertions(+), 34 deletions(-) + create mode 100644 Misc/NEWS.d/next/Security/2021-01-18-09-27-31.bpo-42938.4Zn4Mp.rst + +diff --git a/Lib/ctypes/test/test_parameters.py b/Lib/ctypes/test/test_parameters.py +index e4c25fd880..531894fdec 100644 +--- a/Lib/ctypes/test/test_parameters.py ++++ b/Lib/ctypes/test/test_parameters.py +@@ -201,6 +201,49 @@ def __dict__(self): + self.assertRaises(ArgumentError, func, 99) + + ++ def test_parameter_repr(self): ++ from ctypes import ( ++ c_bool, ++ c_char, ++ c_wchar, ++ c_byte, ++ c_ubyte, ++ c_short, ++ c_ushort, ++ c_int, ++ c_uint, ++ c_long, ++ c_ulong, ++ c_longlong, ++ c_ulonglong, ++ c_float, ++ c_double, ++ c_longdouble, ++ c_char_p, ++ c_wchar_p, ++ c_void_p, ++ ) ++ self.assertRegex(repr(c_bool.from_param(True)), r"^<cparam '\?' at 0x[A-Fa-f0-9]+>$") ++ self.assertEqual(repr(c_char.from_param(97)), "<cparam 'c' ('a')>") ++ self.assertRegex(repr(c_wchar.from_param('a')), r"^<cparam 'u' at 0x[A-Fa-f0-9]+>$") ++ self.assertEqual(repr(c_byte.from_param(98)), "<cparam 'b' (98)>") ++ self.assertEqual(repr(c_ubyte.from_param(98)), "<cparam 'B' (98)>") ++ self.assertEqual(repr(c_short.from_param(511)), "<cparam 'h' (511)>") ++ self.assertEqual(repr(c_ushort.from_param(511)), "<cparam 'H' (511)>") ++ self.assertRegex(repr(c_int.from_param(20000)), r"^<cparam '[li]' \(20000\)>$") ++ self.assertRegex(repr(c_uint.from_param(20000)), r"^<cparam '[LI]' \(20000\)>$") ++ self.assertRegex(repr(c_long.from_param(20000)), r"^<cparam '[li]' \(20000\)>$") ++ self.assertRegex(repr(c_ulong.from_param(20000)), r"^<cparam '[LI]' \(20000\)>$") ++ self.assertRegex(repr(c_longlong.from_param(20000)), r"^<cparam '[liq]' \(20000\)>$") ++ self.assertRegex(repr(c_ulonglong.from_param(20000)), r"^<cparam '[LIQ]' \(20000\)>$") ++ self.assertEqual(repr(c_float.from_param(1.5)), "<cparam 'f' (1.5)>") ++ self.assertEqual(repr(c_double.from_param(1.5)), "<cparam 'd' (1.5)>") ++ self.assertEqual(repr(c_double.from_param(1e300)), "<cparam 'd' (1e+300)>") ++ self.assertRegex(repr(c_longdouble.from_param(1.5)), r"^<cparam ('d' \(1.5\)|'g' at 0x[A-Fa-f0-9]+)>$") ++ self.assertRegex(repr(c_char_p.from_param(b'hihi')), "^<cparam 'z' \(0x[A-Fa-f0-9]+\)>$") ++ self.assertRegex(repr(c_wchar_p.from_param('hihi')), "^<cparam 'Z' \(0x[A-Fa-f0-9]+\)>$") ++ self.assertRegex(repr(c_void_p.from_param(0x12)), r"^<cparam 'P' \(0x0*12\)>$") ++ + ################################################################ + + if __name__ == '__main__': +diff --git a/Misc/NEWS.d/next/Security/2021-01-18-09-27-31.bpo-42938.4Zn4Mp.rst b/Misc/NEWS.d/next/Security/2021-01-18-09-27-31.bpo-42938.4Zn4Mp.rst +new file mode 100644 +index 0000000000..7df65a156f +--- /dev/null ++++ b/Misc/NEWS.d/next/Security/2021-01-18-09-27-31.bpo-42938.4Zn4Mp.rst +@@ -0,0 +1,2 @@ ++Avoid static buffers when computing the repr of :class:`ctypes.c_double` and ++:class:`ctypes.c_longdouble` values. +diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c +index 70e416b950..9fcf95f543 100644 +--- a/Modules/_ctypes/callproc.c ++++ b/Modules/_ctypes/callproc.c +@@ -451,54 +451,43 @@ PyCArg_dealloc(PyCArgObject *self) + static PyObject * + PyCArg_repr(PyCArgObject *self) + { +- char buffer[256]; + switch(self->tag) { + case 'b': + case 'B': +- sprintf(buffer, "<cparam '%c' (%d)>", ++ return PyUnicode_FromFormat("<cparam '%c' (%d)>", + self->tag, self->value.b); +- break; + case 'h': + case 'H': +- sprintf(buffer, "<cparam '%c' (%d)>", ++ return PyUnicode_FromFormat("<cparam '%c' (%d)>", + self->tag, self->value.h); +- break; + case 'i': + case 'I': +- sprintf(buffer, "<cparam '%c' (%d)>", ++ return PyUnicode_FromFormat("<cparam '%c' (%d)>", + self->tag, self->value.i); +- break; + case 'l': + case 'L': +- sprintf(buffer, "<cparam '%c' (%ld)>", ++ return PyUnicode_FromFormat("<cparam '%c' (%ld)>", + self->tag, self->value.l); +- break; + + #ifdef HAVE_LONG_LONG + case 'q': + case 'Q': +- sprintf(buffer, +-#ifdef MS_WIN32 +- "<cparam '%c' (%I64d)>", +-#else +- "<cparam '%c' (%qd)>", +-#endif ++ return PyUnicode_FromFormat("<cparam '%c' (%qd)>", + self->tag, self->value.q); +- break; + #endif + case 'd': +- sprintf(buffer, "<cparam '%c' (%f)>", +- self->tag, self->value.d); +- break; +- case 'f': +- sprintf(buffer, "<cparam '%c' (%f)>", +- self->tag, self->value.f); +- break; +- ++ case 'f': { ++ PyObject *f = PyFloat_FromDouble((self->tag == 'f') ? self->value.f : self->value.d); ++ if (f == NULL) { ++ return NULL; ++ } ++ { PyObject *result = PyUnicode_FromFormat("<cparam '%c' (%R)>", self->tag, f); ++ Py_DECREF(f); ++ return result; } ++ } + case 'c': +- sprintf(buffer, "<cparam '%c' (%c)>", ++ return PyUnicode_FromFormat("<cparam '%c' ('%c')>", + self->tag, self->value.c); +- break; + + /* Hm, are these 'z' and 'Z' codes useful at all? + Shouldn't they be replaced by the functionality of c_string +@@ -507,16 +495,14 @@ PyCArg_repr(PyCArgObject *self) + case 'z': + case 'Z': + case 'P': +- sprintf(buffer, "<cparam '%c' (%p)>", ++ return PyUnicode_FromFormat("<cparam '%c' (%p)>", + self->tag, self->value.p); + break; + + default: +- sprintf(buffer, "<cparam '%c' at %p>", +- self->tag, self); +- break; ++ return PyUnicode_FromFormat("<cparam '%c' at %p>", ++ (unsigned char)self->tag, (void *)self); + } +- return PyUnicode_FromString(buffer); + } + + static PyMemberDef PyCArgType_members[] = { +-- +2.29.2 + diff --git a/external/python3/UnpackedTarball_python3.mk b/external/python3/UnpackedTarball_python3.mk index ee99de1f5e0c..81a392f76f48 100644 --- a/external/python3/UnpackedTarball_python3.mk +++ b/external/python3/UnpackedTarball_python3.mk @@ -26,6 +26,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,python3,\ external/python3/ubsan.patch.0 \ external/python3/python-3.5.tweak.strip.soabi.patch \ external/python3/0001-3.6-bpo-17239-Disable-external-entities-in-SAX-parse.patch.1 \ + external/python3/0001-3.6-closes-bpo-42938-Replace-snprintf-with-Python-un.patch.1 \ )) ifneq ($(filter DRAGONFLY FREEBSD LINUX NETBSD OPENBSD SOLARIS,$(OS)),) commit 586d2c59df0ee87dccdc237e99691f7d5b12c393 Author: Michael Stahl <michael.st...@cib.de> AuthorDate: Wed Oct 30 16:06:02 2019 +0100 Commit: Gabor Kelemen <kelemen.gab...@nisz.hu> CommitDate: Tue Oct 19 13:57:07 2021 +0200 sw: WW8 import: filter control characters in GetFieldResult() Triggers the assert in SwSubFont::GetTextSize_() on ooo58234-1.doc, which has a field result with ^G cell separators that is converted to SwInputField, which inserts the field result into SwTextNode. Change-Id: Ibdb93390862a11462d62cf744bac912d6009777e Reviewed-on: https://gerrit.libreoffice.org/81788 Tested-by: Jenkins Reviewed-by: Michael Stahl <michael.st...@cib.de> (cherry picked from commit 3a9d504b01c061f60a915b5681c8313859294118) diff --git a/sw/source/filter/ww8/ww8par5.cxx b/sw/source/filter/ww8/ww8par5.cxx index d4ced6eec07b..d2ab475b3630 100644 --- a/sw/source/filter/ww8/ww8par5.cxx +++ b/sw/source/filter/ww8/ww8par5.cxx @@ -32,6 +32,7 @@ #include <com/sun/star/task/InteractionHandler.hpp> #include <com/sun/star/ucb/XCommandEnvironment.hpp> +#include <svl/lngmisc.hxx> #include <svl/urihelper.hxx> #include <svl/zforlist.hxx> #include <svl/zformat.hxx> @@ -1192,7 +1193,35 @@ OUString SwWW8ImplReader::GetFieldResult( WW8FieldDesc* pF ) m_pStrm->Seek( nOldPos ); //replace both CR 0x0D and VT 0x0B with LF 0x0A - return sRes.replace(0x0D, 0x0A).replace(0x0B, 0x0A); + // at least in the cases where the result is added to an SwInputField + // there must not be control characters in it + OUStringBuffer buf(sRes.getLength()); + for (sal_Int32 i = 0; i < sRes.getLength(); ++i) + { + sal_Unicode const ch(sRes[i]); + if (!linguistic::IsControlChar(ch)) + { + buf.append(ch); + } + else + { + switch (ch) + { + case 0x0B: + case '\r': + buf.append('\n'); + break; + case '\n': + case '\t': + buf.append(ch); + break; + default: + SAL_INFO("sw.ww8", "GetFieldResult(): filtering control character"); + break; + } + } + } + return buf.makeStringAndClear(); } /* commit c8ee4b2347ddce1427dc2999e7e13c97a0e917cb Author: Michael Stahl <michael.st...@cib.de> AuthorDate: Tue Nov 12 18:57:58 2019 +0100 Commit: Gabor Kelemen <kelemen.gab...@nisz.hu> CommitDate: Tue Oct 19 12:38:39 2021 +0200 ofz#18526 sw: WW8 import: don't insert control characters Sanitize string before calling InsertString(). This segfaults since: commit b522fc0646915d4da94df38dd249c88b28f25be7 Date: Tue Sep 24 18:11:45 2019 +0200 sw: maintain fieldmarks in DeleteRange()/DeleteAndJoin()/ReplaceRange() Reviewed-on: https://gerrit.libreoffice.org/81949 Tested-by: Jenkins Reviewed-by: Caolán McNamara <caol...@redhat.com> Tested-by: Caolán McNamara <caol...@redhat.com> (cherry picked from commit 7ecda38cdaa2361e8510bf3e7206863c4936deab) Reviewed-on: https://gerrit.libreoffice.org/82759 (cherry picked from commit d494a4c0ead7db481757d8d67fbce9e1b02e65df) Change-Id: I9ef73d924420686f6838fa21900ec57b4d25c905 diff --git a/sw/qa/core/data/ww5/pass/ofz18526-1.doc b/sw/qa/core/data/ww5/pass/ofz18526-1.doc new file mode 100644 index 000000000000..e651650f9a26 Binary files /dev/null and b/sw/qa/core/data/ww5/pass/ofz18526-1.doc differ diff --git a/sw/source/filter/ww8/ww8par.cxx b/sw/source/filter/ww8/ww8par.cxx index a4f0817b8216..ca9065f14acc 100644 --- a/sw/source/filter/ww8/ww8par.cxx +++ b/sw/source/filter/ww8/ww8par.cxx @@ -108,6 +108,8 @@ #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp> #include <com/sun/star/document/XViewDataSupplier.hpp> #include <com/sun/star/document/IndexedPropertyValues.hpp> + +#include <svl/lngmisc.hxx> #include <svl/itemiter.hxx> #include <comphelper/processfactory.hxx> @@ -3346,14 +3348,38 @@ void SwWW8ImplReader::emulateMSWordAddTextToParagraph(const OUString& rAddString } } +namespace sw { + +auto FilterControlChars(OUString const& rString) -> OUString +{ + OUStringBuffer buf(rString.getLength()); + for (sal_Int32 i = 0; i < rString.getLength(); ++i) + { + sal_Unicode const ch(rString[i]); + if (!linguistic::IsControlChar(ch) || ch == '\r' || ch == '\n' || ch == '\t') + { + buf.append(ch); + } + else + { + SAL_INFO("sw.ww8", "filtering control character"); + } + } + return buf.makeStringAndClear(); +} + +} // namespace sw + void SwWW8ImplReader::simpleAddTextToParagraph(const OUString& rAddString) { - if (rAddString.isEmpty()) + OUString const addString(sw::FilterControlChars(rAddString)); + + if (addString.isEmpty()) return; #if OSL_DEBUG_LEVEL > 1 { - OString sText(OUStringToOString(rAddString, RTL_TEXTENCODING_UTF8)); + OString sText(OUStringToOString(AddString, RTL_TEXTENCODING_UTF8)); SAL_INFO("sw.ww8", "<addTextToParagraph>" << sText.getStr() << "</addTextToParagraph>"); } #endif @@ -3369,21 +3395,21 @@ void SwWW8ImplReader::simpleAddTextToParagraph(const OUString& rAddString) const sal_Int32 nCharsLeft = SAL_MAX_INT32 - pNd->GetText().getLength(); if (nCharsLeft > 0) { - if (rAddString.getLength() <= nCharsLeft) + if (addString.getLength() <= nCharsLeft) { - m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, rAddString); + m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, addString); } else { - m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, rAddString.copy(0, nCharsLeft)); + m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, addString.copy(0, nCharsLeft)); AppendTextNode(*m_pPaM->GetPoint()); - m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, rAddString.copy(nCharsLeft)); + m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, addString.copy(nCharsLeft)); } } else { AppendTextNode(*m_pPaM->GetPoint()); - m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, rAddString); + m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, addString); } m_bReadTable = false; @@ -3409,13 +3435,17 @@ bool SwWW8ImplReader::ReadChars(WW8_CP& rPos, WW8_CP nNextAttr, long nTextEnd, nRequested = nMaxPossible; } - for (WW8_CP nCh = 0; nCh < nRequested; ++nCh) + if (!linguistic::IsControlChar(m_cSymbol) + || m_cSymbol == '\r' || m_cSymbol == '\n' || m_cSymbol == '\t') { - m_rDoc.getIDocumentContentOperations().InsertString( *m_pPaM, OUString(m_cSymbol) ); + for (WW8_CP nCh = 0; nCh < nRequested; ++nCh) + { + m_rDoc.getIDocumentContentOperations().InsertString(*m_pPaM, OUString(m_cSymbol)); + } + m_pCtrlStck->SetAttr(*m_pPaM->GetPoint(), RES_CHRATR_FONT); + m_pCtrlStck->SetAttr(*m_pPaM->GetPoint(), RES_CHRATR_CJK_FONT); + m_pCtrlStck->SetAttr(*m_pPaM->GetPoint(), RES_CHRATR_CTL_FONT); } - m_pCtrlStck->SetAttr( *m_pPaM->GetPoint(), RES_CHRATR_FONT ); - m_pCtrlStck->SetAttr( *m_pPaM->GetPoint(), RES_CHRATR_CJK_FONT ); - m_pCtrlStck->SetAttr( *m_pPaM->GetPoint(), RES_CHRATR_CTL_FONT ); } m_pStrm->SeekRel(nRequested); rPos = nEnd; // Ignore until attribute end diff --git a/sw/source/filter/ww8/ww8par.hxx b/sw/source/filter/ww8/ww8par.hxx index b0a20a038369..a2867d40177e 100644 --- a/sw/source/filter/ww8/ww8par.hxx +++ b/sw/source/filter/ww8/ww8par.hxx @@ -542,6 +542,8 @@ namespace sw sal_Int32 GetPtContent() { return mnPtContent; }; }; } + + auto FilterControlChars(OUString const& rString) -> OUString; } class WW8FieldEntry diff --git a/sw/source/filter/ww8/ww8par5.cxx b/sw/source/filter/ww8/ww8par5.cxx index 82efb06481fd..d4ced6eec07b 100644 --- a/sw/source/filter/ww8/ww8par5.cxx +++ b/sw/source/filter/ww8/ww8par5.cxx @@ -35,6 +35,7 @@ #include <svl/urihelper.hxx> #include <svl/zforlist.hxx> #include <svl/zformat.hxx> +#include <svl/lngmisc.hxx> #include <sfx2/linkmgr.hxx> #include <ucbhelper/content.hxx> @@ -1886,7 +1887,8 @@ eF_ResT SwWW8ImplReader::Read_F_Symbol( WW8FieldDesc*, OUString& rStr ) if( aQ.isEmpty() ) return eF_ResT::TAGIGN; // -> no 0-char in text - if (sal_Unicode cChar = static_cast<sal_Unicode>(aQ.toInt32())) + sal_Unicode const cChar = static_cast<sal_Unicode>(aQ.toInt32()); + if (!linguistic::IsControlChar(cChar) || cChar == '\r' || cChar == '\n' || cChar == '\t') { if (!aName.isEmpty()) // Font Name set ? { @@ -2667,11 +2669,11 @@ void SwWW8ImplReader::Read_SubF_Ruby( WW8ReadFieldParams& rReadParam) if ((nBegin != -1) && (nEnd != -1) && (nBegin < nEnd)) { sText = sPart.copy(nBegin+1,nEnd-nBegin-1); + sText = sw::FilterControlChars(sText); } } } } - } break; } commit 219499f20068ee09361c23cb1b9e3fb8f14f8c4a Author: Michael Stahl <michael.st...@cib.de> AuthorDate: Thu Nov 14 17:37:17 2019 +0100 Commit: Gabor Kelemen <kelemen.gab...@nisz.hu> CommitDate: Tue Oct 19 11:57:43 2021 +0200 sw: WW8 import: instead of control character insert '?' for footnote SwWW8ImplReader::ReadChar() inserts a U+0002 control character to temporarily mark a footnote anchor; this is then deleted and replaced with a real footnote hint by SwWW8ImplReader::End_Footnote(). The assumption is that it is necessary to insert a placeholder character to be able to apply formatting to it. But if the document is corrupted, the control character could survive the import, which sounds less than ideal. So either make this magic character more explicit by documenting it in hintids.hxx and removing any outstanding ones at the end of the import, or use a non-offensive character instead; since this should only affect invalid documents, choose the solution with the least effort. Change-Id: I76d396258b32e0f0fb6393942a58a4dc57912211 Reviewed-on: https://gerrit.libreoffice.org/82760 Tested-by: Jenkins Reviewed-by: Caolán McNamara <caol...@redhat.com> Tested-by: Caolán McNamara <caol...@redhat.com> (cherry picked from commit 13ba765c444713b0b0b2f4b4231bdafcbbef6ad0) diff --git a/sw/source/filter/ww8/ww8par.cxx b/sw/source/filter/ww8/ww8par.cxx index cd9ba231d788..a4f0817b8216 100644 --- a/sw/source/filter/ww8/ww8par.cxx +++ b/sw/source/filter/ww8/ww8par.cxx @@ -3685,7 +3685,7 @@ bool SwWW8ImplReader::ReadChar(long nPosCp, long nCpOfs) break; case 0x2: // TODO: Auto-Footnote-Number, should be replaced by SwWW8ImplReader::End_Footnote later if (!m_aFootnoteStack.empty()) - cInsert = 0x2; + cInsert = '?'; break; default: SAL_INFO( "sw.ww8.level2", "<unknownValue val=\"" << nWCharVal << "\">" ); commit e6b46b8281884206e878348224accffb4b145701 Author: Michael Stahl <michael.st...@cib.de> AuthorDate: Tue Oct 29 15:54:41 2019 +0100 Commit: Gabor Kelemen <kelemen.gab...@nisz.hu> CommitDate: Tue Oct 19 11:48:37 2021 +0200 writerfilter: rtftok: filter control characters ... in RTFDocumentImpl::checkUnicode(); see ooo86460-1.xls [sic] for an example. There is another caller of text() in rtfdispatchdestination.cxx:311 but it turns out that buffered text was created by text() in the first place. This shouldn't be a problem for DOCX because XML 1.0 doesn't allow the bad control characters anyway so the sax parser should report an error in that case. Reviewed-on: https://gerrit.libreoffice.org/81697 Reviewed-by: Michael Stahl <michael.st...@cib.de> Tested-by: Michael Stahl <michael.st...@cib.de> (cherry picked from commit a6516c76c01b92f7d35bfb352b63af7de42b5707) Change-Id: Ice45e1c3c8c7db668a4cfb8364e42addea1777ce diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx index af8ef382b53a..dc25bc834980 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx @@ -34,6 +34,7 @@ #include <comphelper/embeddedobjectcontainer.hxx> #include <comphelper/sequenceashashmap.hxx> #include <comphelper/sequence.hxx> +#include <svl/lngmisc.hxx> #include <sfx2/sfxbasemodel.hxx> #include <sfx2/classificationhelper.hxx> #include <sfx2/sfx.hrc> @@ -3191,16 +3192,40 @@ void RTFDocumentImpl::setSkipUnknown(bool bSkipUnknown) m_bSkipUnknown = bSkipUnknown; } +static auto FilterControlChars(Destination const destination, OUString const& rString) -> OUString +{ + if (destination == Destination::LEVELNUMBERS || destination == Destination::LEVELTEXT) + { // control characters are magic here! + return rString; + } + OUStringBuffer buf(rString.getLength()); + for (sal_Int32 i = 0; i < rString.getLength(); ++i) + { + sal_Unicode const ch(rString[i]); + if (!linguistic::IsControlChar(ch) || ch == '\r' || ch == '\n' || ch == '\t') + { + buf.append(ch); + } + else + { + SAL_INFO("writerfilter.rtf", "filtering control character"); + } + } + return buf.makeStringAndClear(); +} + void RTFDocumentImpl::checkUnicode(bool bUnicode, bool bHex) { if (bUnicode && !m_aUnicodeBuffer.isEmpty()) { OUString aString = m_aUnicodeBuffer.makeStringAndClear(); + aString = FilterControlChars(m_aStates.top().eDestination, aString); text(aString); } if (bHex && !m_aHexBuffer.isEmpty()) { OUString aString = OStringToOUString(m_aHexBuffer.makeStringAndClear(), m_aStates.top().nCurrentEncoding); + aString = FilterControlChars(m_aStates.top().eDestination, aString); text(aString); } } commit ef5e893b57e9ca98236c9176438db61c90107a3d Author: Michael Stahl <michael.st...@cib.de> AuthorDate: Tue Oct 29 15:52:34 2019 +0100 Commit: Gabor Kelemen <kelemen.gab...@nisz.hu> CommitDate: Tue Oct 19 11:03:48 2021 +0200 sw: UNO API: do not allow inserting control characters into nodes Refuse invalid input in DocInsertStringSplitCR(). Reviewed-on: https://gerrit.libreoffice.org/81696 Tested-by: Jenkins Reviewed-by: Michael Stahl <michael.st...@cib.de> (cherry picked from commit 9b1e3e9bfdc0639630a367e45e4bdc2e9f22e503) Change-Id: I097c1b3a1f70b0cf1fa3fc33fc1d965ee6c96280 diff --git a/sw/source/core/unocore/unocrsrhelper.cxx b/sw/source/core/unocore/unocrsrhelper.cxx index 71faf3dbb0bd..547598423ab7 100644 --- a/sw/source/core/unocore/unocrsrhelper.cxx +++ b/sw/source/core/unocore/unocrsrhelper.cxx @@ -65,6 +65,7 @@ #include <cntfrm.hxx> #include <pagefrm.hxx> #include <svl/eitem.hxx> +#include <svl/lngmisc.hxx> #include <docary.hxx> #include <swtable.hxx> #include <tox.hxx> @@ -1095,6 +1096,17 @@ bool DocInsertStringSplitCR( { bool bOK = true; + for (sal_Int32 i = 0; i < rText.getLength(); ++i) + { + sal_Unicode const ch(rText[i]); + if (linguistic::IsControlChar(ch) + && ch != '\r' && ch != '\n' && ch != '\t') + { + SAL_WARN("sw.uno", "DocInsertStringSplitCR: refusing to insert control character " << int(ch)); + return false; + } + } + const SwInsertFlags nInsertFlags = bForceExpandHints ? ( SwInsertFlags::FORCEHINTEXPAND | SwInsertFlags::EMPTYEXPAND) commit 2e2efeb822262ffdbd9f85790b7389453c615fd2 Author: Michael Stahl <michael.st...@cib.de> AuthorDate: Mon Oct 28 14:31:23 2019 +0100 Commit: Gabor Kelemen <kelemen.gab...@nisz.hu> CommitDate: Tue Oct 19 10:43:56 2021 +0200 svl: HTMLParser: stop inserting control character garbage into Writer E.g. rhbz433940-1.html contains literal ^G characters that are inserted as-is into SwTextNodes. This now triggers assert about CH_TXT_ATR_FIELDSTART in SwSubFont::GetTextSize_() that was added in 19a559b0ec9b806519c405651d6d2b2e14712b4a. Change-Id: I6aa7de41a04069e15b40865fd57894dae0fc10db Reviewed-on: https://gerrit.libreoffice.org/81606 Reviewed-by: Michael Stahl <michael.st...@cib.de> Tested-by: Michael Stahl <michael.st...@cib.de> (cherry picked from commit 35d248cab1f0d4800f72abb5cb6afb56f40d9083) diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx index 5f8ef05318c2..53f3626d74a3 100644 --- a/svtools/source/svhtml/parhtml.cxx +++ b/svtools/source/svhtml/parhtml.cxx @@ -29,6 +29,7 @@ #include <tools/datetime.hxx> #include <unotools/datetime.hxx> #include <svl/inettype.hxx> +#include <svl/lngmisc.hxx> #include <com/sun/star/beans/PropertyAttribute.hpp> #include <com/sun/star/document/XDocumentProperties.hpp> @@ -432,8 +433,12 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak ) else nNextCh = 0U; - if ( ! rtl::isUnicodeCodePoint( cChar ) ) + if (!rtl::isUnicodeCodePoint(cChar) + || (linguistic::IsControlChar(cChar) + && cChar != '\r' && cChar != '\n' && cChar != '\t')) + { cChar = '?'; + } } else if( rtl::isAsciiAlpha( nNextCh ) ) { @@ -721,8 +726,11 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak ) else { do { + if (!linguistic::IsControlChar(nNextCh)) + { // All remaining characters make their way into the text. - sTmpBuffer.appendUtf32( nNextCh ); + sTmpBuffer.appendUtf32( nNextCh ); + } if( MAX_LEN == sTmpBuffer.getLength() ) { aToken += sTmpBuffer.makeStringAndClear(); @@ -949,8 +957,11 @@ HtmlTokenId HTMLParser::GetNextRawToken() } SAL_FALLTHROUGH; default: - // all remaining characters are appended to the buffer - sTmpBuffer.appendUtf32( nNextCh ); + if (!linguistic::IsControlChar(nNextCh) || nNextCh == '\t') + { + // all remaining characters are appended to the buffer + sTmpBuffer.appendUtf32( nNextCh ); + } break; }