i18nutil/source/utility/unicode.cxx | 52 +++++++++++++++--------------------- include/i18nutil/unicode.hxx | 4 ++ sw/qa/extras/uiwriter/uiwriter7.cxx | 16 +++++++++++ 3 files changed, 42 insertions(+), 30 deletions(-)
New commits: commit 54ea623c76f5f739b56f820d52a0a784e3e73af6 Author: Mike Kaganski <mike.kagan...@collabora.com> AuthorDate: Tue Jun 10 22:13:43 2025 +0200 Commit: Adolfo Jayme Barrientos <fit...@ubuntu.com> CommitDate: Sat Jun 14 07:49:07 2025 +0200 tdf#166943: zero passed to AllowMoreInput means "no more input" Users of ToggleUnicodeCodepoint keep reading next characters to the left, until AllowMoreInput returns false (or selection ends). Attempting to read more characters to the left then there are, SwCursorShell::GetChar returns 0. This needs to be treated by AllowMoreInput as hard stop. It failed, when the previous character was a combining character. Change-Id: I203b150154e1948d4cebfd69442e30a076710f46 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/186341 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> (cherry picked from commit 58a7c6ccfd3fa590460dba1ecbdef4483dcd5e08) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/186350 Reviewed-by: Adolfo Jayme Barrientos <fit...@ubuntu.com> diff --git a/i18nutil/source/utility/unicode.cxx b/i18nutil/source/utility/unicode.cxx index e8476dd8bbe6..0f31ad040756 100644 --- a/i18nutil/source/utility/unicode.cxx +++ b/i18nutil/source/utility/unicode.cxx @@ -1074,11 +1074,13 @@ OUString unicode::formatPercent(double dNumber, bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) { + assert(!mbInputEnded); + + if (uChar == 0) + return false; + //arbitrarily chosen maximum length allowed - normal max usage would be around 30. if( maInput.getLength() > 255 ) - mbAllowMoreChars = false; - - if( !mbAllowMoreChars ) return false; bool bPreventNonHex = false; @@ -1090,7 +1092,6 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) case css::i18n::UnicodeType::SURROGATE: if (bPreventNonHex || mbIsHexString) { - mbAllowMoreChars = false; return false; } @@ -1114,14 +1115,12 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) maInput.append(maUtf16); if( !maCombining.isEmpty() ) maInput.append(maCombining); - mbAllowMoreChars = false; - break; + return false; case css::i18n::UnicodeType::NON_SPACING_MARK: case css::i18n::UnicodeType::COMBINING_SPACING_MARK: if (bPreventNonHex || mbIsHexString) { - mbAllowMoreChars = false; return false; } @@ -1131,7 +1130,6 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) maInput = maUtf16; if( !maCombining.isEmpty() ) maInput.append(maCombining); - mbAllowMoreChars = false; return false; } maCombining.insertUtf32(0, uChar); @@ -1144,7 +1142,6 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) maInput = maUtf16; if( !maCombining.isEmpty() ) maInput.append(maCombining); - mbAllowMoreChars = false; return false; } @@ -1152,14 +1149,12 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) { maCombining.insertUtf32(0, uChar); maInput = maCombining; - mbAllowMoreChars = false; return false; } // 0 - 1f are control characters. Do not process those. if( uChar < 0x20 ) { - mbAllowMoreChars = false; return false; } @@ -1176,36 +1171,36 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) // treat as a normal character else { - mbAllowMoreChars = false; if( !bPreventNonHex ) maInput.insertUtf32(0, uChar); + return false; } break; case '+': // + already found: skip when not U, or edge case of +U+xxxx if( mbRequiresU || (maInput.indexOf("U+") == 0) ) - mbAllowMoreChars = false; + return false; // hex chars followed by '+' - now require a 'U' else if ( !maInput.isEmpty() ) mbRequiresU = true; // treat as a normal character else { - mbAllowMoreChars = false; if( !bPreventNonHex ) maInput.insertUtf32(0, uChar); + return false; } break; default: // + already found. Since not U, cancel further input if( mbRequiresU ) - mbAllowMoreChars = false; + return false; // maximum digits per notation is 8: only one notation else if( maInput.indexOf("U+") == -1 && maInput.getLength() == 8 ) - mbAllowMoreChars = false; + return false; // maximum digits per notation is 8: previous notation found else if( maInput.indexOf("U+") == 8 ) - mbAllowMoreChars = false; + return false; // a hex character. Add to string. else if( rtl::isAsciiHexDigit(uChar) ) { @@ -1215,36 +1210,35 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) // not a hex character: stop input. keep if it is the first input provided else { - mbAllowMoreChars = false; if( maInput.isEmpty() ) maInput.insertUtf32(0, uChar); + return false; } } } - return mbAllowMoreChars; + return true; } OUString ToggleUnicodeCodepoint::StringToReplace() { + // this function potentially modifies the input string. No more addition of characters +#ifndef NDEBUG + mbInputEnded = true; +#endif + if( maInput.isEmpty() ) { //edge case - input finished with incomplete low surrogate or combining characters without a base - if( mbAllowMoreChars ) - { - if( !maUtf16.isEmpty() ) - maInput = maUtf16; - if( !maCombining.isEmpty() ) - maInput.append(maCombining); - } + if (!maUtf16.isEmpty()) + maInput = maUtf16; + if (!maCombining.isEmpty()) + maInput.append(maCombining); return maInput.toString(); } if( !mbIsHexString ) return maInput.toString(); - //this function potentially modifies the input string. Prevent addition of further characters - mbAllowMoreChars = false; - //validate unicode notation. OUString sIn; sal_uInt32 nUnicode = 0; diff --git a/include/i18nutil/unicode.hxx b/include/i18nutil/unicode.hxx index 0ee7d1c30d37..3c8c59b08db2 100644 --- a/include/i18nutil/unicode.hxx +++ b/include/i18nutil/unicode.hxx @@ -96,9 +96,11 @@ private: OUStringBuffer maInput; OUStringBuffer maUtf16; OUStringBuffer maCombining; - bool mbAllowMoreChars = true; bool mbRequiresU = false; bool mbIsHexString = false; +#ifndef NDEBUG + bool mbInputEnded = false; +#endif public: /** diff --git a/sw/qa/extras/uiwriter/uiwriter7.cxx b/sw/qa/extras/uiwriter/uiwriter7.cxx index c7a8f36061e5..3607b32a3576 100644 --- a/sw/qa/extras/uiwriter/uiwriter7.cxx +++ b/sw/qa/extras/uiwriter/uiwriter7.cxx @@ -2424,6 +2424,22 @@ CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testUnicodeNotationToggle) // i.e., it converted the last combined character *before* the HEX code *to HEX*, replacing // the last character of the HEX; not the expected conversion of the code itself *from HEX*. CPPUNIT_ASSERT_EQUAL(u"\u0065\u0300n"_ustr, sDocString); + + // When a combining character stands alone in the beginning of a line, toggle must not treat + // incoming "zero" indicating "there is no more input" as a character to combine with the + // combining character. Before tdf#166943 fix, it treated zero as such character, got input + // length of 2, tried to select and replace two characters to the left of cursor, and crashed + pWrtShell->SelAll(); + pWrtShell->DelLeft(); + pWrtShell->Insert2(u"U+0300"_ustr); // A combining diacritic code in the beginning of the text + dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, aPropertyValues); + sDocString = pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText(); + CPPUNIT_ASSERT_EQUAL(u"\u0300"_ustr, sDocString); // A lone combining diacritic + + // Toggle must not crash, and must produce the correct result + dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, aPropertyValues); + sDocString = pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText(); + CPPUNIT_ASSERT_EQUAL(u"U+0300"_ustr, sDocString); } CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testTdf34957)