core.git: Branch 'libreoffice-25-8' - i18nutil/source include/i18nutil sw/qa

Mike Kaganski (via logerrit) Fri, 13 Jun 2025 22:49:27 -0700

 i18nutil/source/utility/unicode.cxx |   52 +++++++++++++++---------------------
 include/i18nutil/unicode.hxx        |    4 ++
 sw/qa/extras/uiwriter/uiwriter7.cxx |   16 +++++++++++
 3 files changed, 42 insertions(+), 30 deletions(-)


New commits:
commit 54ea623c76f5f739b56f820d52a0a784e3e73af6
Author:     Mike Kaganski <mike.kagan...@collabora.com>
AuthorDate: Tue Jun 10 22:13:43 2025 +0200
Commit:     Adolfo Jayme Barrientos <fit...@ubuntu.com>
CommitDate: Sat Jun 14 07:49:07 2025 +0200

    tdf#166943: zero passed to AllowMoreInput means "no more input"
    
    Users of ToggleUnicodeCodepoint keep reading next characters to the left,
    until AllowMoreInput returns false (or selection ends). Attempting to
    read more characters to the left then there are, SwCursorShell::GetChar
    returns 0. This needs to be treated by AllowMoreInput as hard stop. It
    failed, when the previous character was a combining character.
    
    Change-Id: I203b150154e1948d4cebfd69442e30a076710f46
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/186341
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>
    (cherry picked from commit 58a7c6ccfd3fa590460dba1ecbdef4483dcd5e08)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/186350
    Reviewed-by: Adolfo Jayme Barrientos <fit...@ubuntu.com>

diff --git a/i18nutil/source/utility/unicode.cxx 
b/i18nutil/source/utility/unicode.cxx
index e8476dd8bbe6..0f31ad040756 100644
--- a/i18nutil/source/utility/unicode.cxx
+++ b/i18nutil/source/utility/unicode.cxx
@@ -1074,11 +1074,13 @@ OUString unicode::formatPercent(double dNumber,
 
 bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar)
 {
+    assert(!mbInputEnded);
+
+    if (uChar == 0)
+        return false;
+
     //arbitrarily chosen maximum length allowed - normal max usage would be 
around 30.
     if( maInput.getLength() > 255 )
-        mbAllowMoreChars = false;
-
-    if( !mbAllowMoreChars )
         return false;
 
     bool bPreventNonHex = false;
@@ -1090,7 +1092,6 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 
uChar)
         case css::i18n::UnicodeType::SURROGATE:
             if (bPreventNonHex || mbIsHexString)
             {
-                mbAllowMoreChars = false;
                 return false;
             }
 
@@ -1114,14 +1115,12 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 
uChar)
                 maInput.append(maUtf16);
             if( !maCombining.isEmpty() )
                 maInput.append(maCombining);
-            mbAllowMoreChars = false;
-            break;
+            return false;
 
         case css::i18n::UnicodeType::NON_SPACING_MARK:
         case css::i18n::UnicodeType::COMBINING_SPACING_MARK:
             if (bPreventNonHex || mbIsHexString)
             {
-                mbAllowMoreChars = false;
                 return false;
             }
 
@@ -1131,7 +1130,6 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 
uChar)
                 maInput = maUtf16;
                 if( !maCombining.isEmpty() )
                     maInput.append(maCombining);
-                mbAllowMoreChars = false;
                 return false;
             }
             maCombining.insertUtf32(0, uChar);
@@ -1144,7 +1142,6 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 
uChar)
                 maInput = maUtf16;
                 if( !maCombining.isEmpty() )
                     maInput.append(maCombining);
-                mbAllowMoreChars = false;
                 return false;
             }
 
@@ -1152,14 +1149,12 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 
uChar)
             {
                 maCombining.insertUtf32(0, uChar);
                 maInput = maCombining;
-                mbAllowMoreChars = false;
                 return false;
             }
 
             // 0 - 1f are control characters.  Do not process those.
             if( uChar < 0x20 )
             {
-                mbAllowMoreChars = false;
                 return false;
             }
 
@@ -1176,36 +1171,36 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 
uChar)
                     // treat as a normal character
                     else
                     {
-                        mbAllowMoreChars = false;
                         if( !bPreventNonHex )
                             maInput.insertUtf32(0, uChar);
+                        return false;
                     }
                     break;
                 case '+':
                     // + already found: skip when not U, or edge case of 
+U+xxxx
                     if( mbRequiresU || (maInput.indexOf("U+") == 0) )
-                        mbAllowMoreChars = false;
+                        return false;
                     // hex chars followed by '+' - now require a 'U'
                     else if ( !maInput.isEmpty() )
                         mbRequiresU = true;
                     // treat as a normal character
                     else
                     {
-                        mbAllowMoreChars = false;
                         if( !bPreventNonHex )
                             maInput.insertUtf32(0, uChar);
+                        return false;
                     }
                     break;
                 default:
                     // + already found. Since not U, cancel further input
                     if( mbRequiresU )
-                        mbAllowMoreChars = false;
+                        return false;
                     // maximum digits per notation is 8: only one notation
                     else if( maInput.indexOf("U+") == -1 && 
maInput.getLength() == 8 )
-                        mbAllowMoreChars = false;
+                        return false;
                     // maximum digits per notation is 8: previous notation 
found
                     else if( maInput.indexOf("U+") == 8 )
-                        mbAllowMoreChars = false;
+                        return false;
                     // a hex character. Add to string.
                     else if( rtl::isAsciiHexDigit(uChar) )
                     {
@@ -1215,36 +1210,35 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 
uChar)
                     // not a hex character: stop input. keep if it is the 
first input provided
                     else
                     {
-                        mbAllowMoreChars = false;
                         if( maInput.isEmpty() )
                             maInput.insertUtf32(0, uChar);
+                        return false;
                     }
             }
     }
-    return mbAllowMoreChars;
+    return true;
 }
 
 OUString ToggleUnicodeCodepoint::StringToReplace()
 {
+    // this function potentially modifies the input string. No more addition 
of characters
+#ifndef NDEBUG
+    mbInputEnded = true;
+#endif
+
     if( maInput.isEmpty() )
     {
         //edge case - input finished with incomplete low surrogate or 
combining characters without a base
-        if( mbAllowMoreChars )
-        {
-            if( !maUtf16.isEmpty() )
-                maInput = maUtf16;
-            if( !maCombining.isEmpty() )
-                maInput.append(maCombining);
-        }
+        if (!maUtf16.isEmpty())
+            maInput = maUtf16;
+        if (!maCombining.isEmpty())
+            maInput.append(maCombining);
         return maInput.toString();
     }
 
     if( !mbIsHexString )
         return maInput.toString();
 
-    //this function potentially modifies the input string.  Prevent addition 
of further characters
-    mbAllowMoreChars = false;
-
     //validate unicode notation.
     OUString sIn;
     sal_uInt32 nUnicode = 0;
diff --git a/include/i18nutil/unicode.hxx b/include/i18nutil/unicode.hxx
index 0ee7d1c30d37..3c8c59b08db2 100644
--- a/include/i18nutil/unicode.hxx
+++ b/include/i18nutil/unicode.hxx
@@ -96,9 +96,11 @@ private:
     OUStringBuffer maInput;
     OUStringBuffer maUtf16;
     OUStringBuffer maCombining;
-    bool mbAllowMoreChars = true;
     bool mbRequiresU = false;
     bool mbIsHexString = false;
+#ifndef NDEBUG
+    bool mbInputEnded = false;
+#endif
 
 public:
     /**
diff --git a/sw/qa/extras/uiwriter/uiwriter7.cxx 
b/sw/qa/extras/uiwriter/uiwriter7.cxx
index c7a8f36061e5..3607b32a3576 100644
--- a/sw/qa/extras/uiwriter/uiwriter7.cxx
+++ b/sw/qa/extras/uiwriter/uiwriter7.cxx
@@ -2424,6 +2424,22 @@ CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, 
testUnicodeNotationToggle)
     // i.e., it converted the last combined character *before* the HEX code 
*to HEX*, replacing
     // the last character of the HEX; not the expected conversion of the code 
itself *from HEX*.
     CPPUNIT_ASSERT_EQUAL(u"\u0065\u0300n"_ustr, sDocString);
+
+    // When a combining character stands alone in the beginning of a line, 
toggle must not treat
+    // incoming "zero" indicating "there is no more input" as a character to 
combine with the
+    // combining character. Before tdf#166943 fix, it treated zero as such 
character, got input
+    // length of 2, tried to select and replace two characters to the left of 
cursor, and crashed
+    pWrtShell->SelAll();
+    pWrtShell->DelLeft();
+    pWrtShell->Insert2(u"U+0300"_ustr); // A combining diacritic code in the 
beginning of the text
+    dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, 
aPropertyValues);
+    sDocString = 
pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText();
+    CPPUNIT_ASSERT_EQUAL(u"\u0300"_ustr, sDocString); // A lone combining 
diacritic
+
+    // Toggle must not crash, and must produce the correct result
+    dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, 
aPropertyValues);
+    sDocString = 
pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText();
+    CPPUNIT_ASSERT_EQUAL(u"U+0300"_ustr, sDocString);
 }
 
 CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testTdf34957)

core.git: Branch 'libreoffice-25-8' - i18nutil/source include/i18nutil sw/qa

Reply via email to