writerfilter/CppunitTest_writerfilter_rtftok.mk | 1 writerfilter/qa/cppunittests/rtftok/data/invalid-hex.rtf | 3 writerfilter/qa/cppunittests/rtftok/rtftokenizer.cxx | 63 +++++++++++++++ writerfilter/source/rtftok/rtftokenizer.cxx | 15 ++- 4 files changed, 77 insertions(+), 5 deletions(-)
New commits: commit 6fc8a6b0b52509d735971f079d7b1660559d475d Author: Miklos Vajna <vmik...@collabora.com> AuthorDate: Mon May 31 21:12:12 2021 +0200 Commit: Miklos Vajna <vmik...@collabora.com> CommitDate: Tue Jun 1 08:46:10 2021 +0200 tdf#142325 RTF import: tolerate invalid hex markup like "\'3?" The RTF spec says \'hh is the expected form, where both "h" are 0-9, a-f or A-F. But Word accepts the bugdoc, so don't reject this input, handle \'<number><junk> as \'0<number>. At least the current case ignores the actual value, as it's a single character to provide a non-unicode value after \uN for old readers that don't support Unicode. Change-Id: Ib61247ab08278ca5012cc887cee26c7571c29fc6 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/116499 Tested-by: Jenkins Reviewed-by: Miklos Vajna <vmik...@collabora.com> diff --git a/writerfilter/CppunitTest_writerfilter_rtftok.mk b/writerfilter/CppunitTest_writerfilter_rtftok.mk index db038292ebdd..07271b777ae0 100644 --- a/writerfilter/CppunitTest_writerfilter_rtftok.mk +++ b/writerfilter/CppunitTest_writerfilter_rtftok.mk @@ -18,6 +18,7 @@ $(eval $(call gb_CppunitTest_use_externals,writerfilter_rtftok,\ $(eval $(call gb_CppunitTest_add_exception_objects,writerfilter_rtftok, \ writerfilter/qa/cppunittests/rtftok/rtfsdrimport \ writerfilter/qa/cppunittests/rtftok/rtfsprm \ + writerfilter/qa/cppunittests/rtftok/rtftokenizer \ )) $(eval $(call gb_CppunitTest_use_libraries,writerfilter_rtftok, \ diff --git a/writerfilter/qa/cppunittests/rtftok/data/invalid-hex.rtf b/writerfilter/qa/cppunittests/rtftok/data/invalid-hex.rtf new file mode 100644 index 000000000000..8f9224c0e905 --- /dev/null +++ b/writerfilter/qa/cppunittests/rtftok/data/invalid-hex.rtf @@ -0,0 +1,3 @@ +{\rtf1 +x\u345\'3?x +\par} diff --git a/writerfilter/qa/cppunittests/rtftok/rtftokenizer.cxx b/writerfilter/qa/cppunittests/rtftok/rtftokenizer.cxx new file mode 100644 index 000000000000..530e9bb7245a --- /dev/null +++ b/writerfilter/qa/cppunittests/rtftok/rtftokenizer.cxx @@ -0,0 +1,63 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <test/bootstrapfixture.hxx> +#include <unotest/macros_test.hxx> + +#include <com/sun/star/frame/Desktop.hpp> +#include <com/sun/star/text/XTextDocument.hpp> + +using namespace ::com::sun::star; + +namespace +{ +/// Tests for writerfilter/source/rtftok/rtftokenizer.cxx. +class Test : public test::BootstrapFixture, public unotest::MacrosTest +{ +private: + uno::Reference<lang::XComponent> mxComponent; + +public: + void setUp() override; + void tearDown() override; + uno::Reference<lang::XComponent>& getComponent() { return mxComponent; } +}; + +void Test::setUp() +{ + test::BootstrapFixture::setUp(); + + mxDesktop.set(frame::Desktop::create(mxComponentContext)); +} + +void Test::tearDown() +{ + if (mxComponent.is()) + mxComponent->dispose(); + + test::BootstrapFixture::tearDown(); +} + +constexpr OUStringLiteral DATA_DIRECTORY = u"/writerfilter/qa/cppunittests/rtftok/data/"; + +CPPUNIT_TEST_FIXTURE(Test, testInvalidHex) +{ + // Given a document with a markup like "\'3?": + OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "invalid-hex.rtf"; + + // When load that document: + getComponent() = loadFromDesktop(aURL); + + // Then make sure the result matches Word, rather than just refusing to import the document: + uno::Reference<text::XTextDocument> xTextDocument(getComponent(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(OUString::fromUtf8("xřx"), xTextDocument->getText()->getString()); +} +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/writerfilter/source/rtftok/rtftokenizer.cxx b/writerfilter/source/rtftok/rtftokenizer.cxx index a04f57a632e6..d702b8dbd9c9 100644 --- a/writerfilter/source/rtftok/rtftokenizer.cxx +++ b/writerfilter/source/rtftok/rtftokenizer.cxx @@ -144,11 +144,16 @@ RTFError RTFTokenizer::resolveParse() else { SAL_INFO("writerfilter.rtf", __func__ << ": hex internal state"); - b = b << 4; - sal_Int8 parsed = msfilter::rtfutil::AsHex(ch); - if (parsed == -1) - return RTFError::HEX_INVALID; - b += parsed; + // Assume that \'<number><junk> means \'0<number>. + if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) + || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) + { + b = b << 4; + sal_Int8 parsed = msfilter::rtfutil::AsHex(ch); + if (parsed == -1) + return RTFError::HEX_INVALID; + b += parsed; + } count--; if (!count) { _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits