svtools/source/svrtf/svparser.cxx | 20 ++++++++++++++++++++ sw/qa/extras/htmlimport/data/emojis16BE.html |binary sw/qa/extras/htmlimport/htmlimport.cxx | 8 ++++++++ 3 files changed, 28 insertions(+)
New commits: commit 21154ea8c450f9f5568b32123d34a20e498a9290 Author: Mike Kaganski <mike.kagan...@collabora.com> AuthorDate: Sat Dec 11 12:53:26 2021 +0300 Commit: Mike Kaganski <mike.kagan...@collabora.com> CommitDate: Sat Dec 11 12:21:57 2021 +0100 tdf#146173: combine non-BMP characters' surrogates correctly Change-Id: Ib3af1f9e461f133d2f5b09b9db4fb87c1ede0b9f Reviewed-on: https://gerrit.libreoffice.org/c/core/+/126658 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> diff --git a/svtools/source/svrtf/svparser.cxx b/svtools/source/svrtf/svparser.cxx index 0fec7a97097e..dd5068976ff7 100644 --- a/svtools/source/svrtf/svparser.cxx +++ b/svtools/source/svrtf/svparser.cxx @@ -247,7 +247,27 @@ sal_uInt32 SvParser<T>::GetNextChar() rInput.ReadUtf16(cUC); bErr = !rInput.good(); if( !bErr ) + { c = cUC; + if (rtl::isHighSurrogate(cUC)) + { + const sal_uInt64 nPos = rInput.Tell(); + rInput.ReadUtf16(cUC); + bErr = !rInput.good(); + if (!bErr) + { + if (rtl::isLowSurrogate(cUC)) + c = rtl::combineSurrogates(c, cUC); + else + rInput.Seek(nPos); // process lone high surrogate + } + else + { + bErr = false; // process lone high surrogate + rInput.Seek(nPos); // maybe step 1 byte back + } + } + } } else { diff --git a/sw/qa/extras/htmlimport/data/emojis16BE.html b/sw/qa/extras/htmlimport/data/emojis16BE.html new file mode 100644 index 000000000000..023aee1cb20e Binary files /dev/null and b/sw/qa/extras/htmlimport/data/emojis16BE.html differ diff --git a/sw/qa/extras/htmlimport/htmlimport.cxx b/sw/qa/extras/htmlimport/htmlimport.cxx index a8b9a4df65a4..00e2ec99191f 100644 --- a/sw/qa/extras/htmlimport/htmlimport.cxx +++ b/sw/qa/extras/htmlimport/htmlimport.cxx @@ -528,6 +528,14 @@ CPPUNIT_TEST_FIXTURE(SwModelTestBase, testOleImgSvg) CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), xObjects->getCount()); } +CPPUNIT_TEST_FIXTURE(HtmlImportTest, testUTF16_nonBMP) +{ + load(mpTestDocumentPath, "emojis16BE.html"); + // tdf#146173: non-BMP characters' surrogates didn't combine correctly + CPPUNIT_ASSERT_EQUAL(OUString(u"a text with emojis: πΎ βπ¨πΌβπΎππΌββοΈπ€π½π"), + getParagraph(1)->getString()); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */