i18nutil/source/utility/unicode.cxx | 4 ++-- sw/qa/extras/uiwriter/uiwriter7.cxx | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-)
New commits: commit 6d78861299331fdfa51a070ced1ce498ceff342f Author: Mike Kaganski <mike.kagan...@collabora.com> AuthorDate: Sun Feb 2 14:37:34 2025 +0500 Commit: Mike Kaganski <mike.kagan...@collabora.com> CommitDate: Sun Feb 2 12:01:13 2025 +0100 tdf#164989: Disallow combining characters when a hex already appeared We either expect a single (maybe combined) character to convert into "U+" notation, or a sequence of hexadecimal numbers (possibly with U+) for conversion to characters. If we already saw a hexadecimal ASCII character, it can't be preceded by a character combining with it, so stop as soon as such an unexpected character appears. Change-Id: Ic480fe8f173240eb263d5a77286b149c933049a6 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/181007 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> diff --git a/i18nutil/source/utility/unicode.cxx b/i18nutil/source/utility/unicode.cxx index d2d54e53623c..c9bfbeeb0a80 100644 --- a/i18nutil/source/utility/unicode.cxx +++ b/i18nutil/source/utility/unicode.cxx @@ -1065,7 +1065,7 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) switch ( unicode::getUnicodeType(uChar) ) { case css::i18n::UnicodeType::SURROGATE: - if( bPreventNonHex ) + if (bPreventNonHex || mbIsHexString) { mbAllowMoreChars = false; return false; @@ -1096,7 +1096,7 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) case css::i18n::UnicodeType::NON_SPACING_MARK: case css::i18n::UnicodeType::COMBINING_SPACING_MARK: - if( bPreventNonHex ) + if (bPreventNonHex || mbIsHexString) { mbAllowMoreChars = false; return false; diff --git a/sw/qa/extras/uiwriter/uiwriter7.cxx b/sw/qa/extras/uiwriter/uiwriter7.cxx index 909d31d70249..abb115312b0b 100644 --- a/sw/qa/extras/uiwriter/uiwriter7.cxx +++ b/sw/qa/extras/uiwriter/uiwriter7.cxx @@ -2411,6 +2411,19 @@ CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testUnicodeNotationToggle) // - Actual : xyz侮U+e0101 // i.e., one codepoint to the left of the combining codepoint was not converted CPPUNIT_ASSERT_EQUAL(sWithCombiningSMPName, sDocString); + + pWrtShell->SplitNode(); + // Given a combined character "è", consisting of U+0065 and U+0300, followed by a HEX + // without a U+ for the conversion into the next character "n" + pWrtShell->Insert2(u"è006E"_ustr); + dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, aPropertyValues); + sDocString = pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText(); + // Before tdf#164989 fix, this failed with + // - Expected: èn + // - Actual : è006U+0300 + // i.e., it converted the last combined character *before* the HEX code *to HEX*, replacing + // the last character of the HEX; not the expected conversion of the code itself *from HEX*. + CPPUNIT_ASSERT_EQUAL(u"\u0065\u0300n"_ustr, sDocString); } CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testTdf34957)