i18nutil/source/utility/unicode.cxx |   33 ++++++++++++++-------------------
 include/i18nutil/unicode.hxx        |   10 ++--------
 sw/qa/extras/uiwriter/uiwriter7.cxx |   24 ++++++++++++++++++++++--
 sw/source/uibase/shells/textsh1.cxx |   10 ++++++----
 4 files changed, 44 insertions(+), 33 deletions(-)

New commits:
commit b54f188b38b2fdb7e3224e5826162d7186c23288
Author:     Mike Kaganski <mike.kagan...@collabora.com>
AuthorDate: Wed Aug 28 18:23:55 2024 +0500
Commit:     Mike Kaganski <mike.kagan...@collabora.com>
CommitDate: Wed Aug 28 20:52:12 2024 +0200

    Drop newly unused ToggleUnicodeCodepoint::CharsToDelete
    
    Change-Id: I25654d911b1cc11f8c0e7d0ef3e87898418c3408
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/172535
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>

diff --git a/i18nutil/source/utility/unicode.cxx 
b/i18nutil/source/utility/unicode.cxx
index ae40fa421c0a..a1e03bd5b41e 100644
--- a/i18nutil/source/utility/unicode.cxx
+++ b/i18nutil/source/utility/unicode.cxx
@@ -1270,19 +1270,6 @@ OUString ToggleUnicodeCodepoint::StringToReplace()
     return maInput.toString();
 }
 
-sal_uInt32 ToggleUnicodeCodepoint::CharsToDelete()
-{
-    OUString sIn = StringToReplace();
-    sal_Int32 nPos = 0;
-    sal_uInt32 counter = 0;
-    while( nPos < sIn.getLength() )
-    {
-        sIn.iterateCodePoints(&nPos);
-        ++counter;
-    }
-    return counter;
-}
-
 OUString ToggleUnicodeCodepoint::ReplacementString()
 {
     OUString sIn = StringToReplace();
diff --git a/include/i18nutil/unicode.hxx b/include/i18nutil/unicode.hxx
index b7722fd77196..ddb75784784f 100644
--- a/include/i18nutil/unicode.hxx
+++ b/include/i18nutil/unicode.hxx
@@ -115,12 +115,6 @@ public:
     */
     OUString StringToReplace();
     OUString ReplacementString();
-
-    /**
-    While sInput.getLength() returns the number of utf16 units to delete,
-        this function returns the number of "characters" to delete - 
potentially a smaller number
-    */
-    sal_uInt32 CharsToDelete();
 };
 
 #endif
commit ac0117713e6ce248c7725b57d709390f27dc4e62
Author:     Mike Kaganski <mike.kagan...@collabora.com>
AuthorDate: Wed Aug 28 18:07:56 2024 +0500
Commit:     Mike Kaganski <mike.kagan...@collabora.com>
CommitDate: Wed Aug 28 20:52:04 2024 +0200

    tdf#162657: re-check the result of combining surrogates
    
    They may constitute a combining mark, and require more characters
    to the left to be processed.
    
    Change-Id: Ie626badf66edaf44c56e0ba2d294304f139d2bca
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/172533
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>

diff --git a/i18nutil/source/utility/unicode.cxx 
b/i18nutil/source/utility/unicode.cxx
index b0e1cd4d85ef..ae40fa421c0a 100644
--- a/i18nutil/source/utility/unicode.cxx
+++ b/i18nutil/source/utility/unicode.cxx
@@ -1068,7 +1068,7 @@ OUString unicode::formatPercent(double dNumber,
     return aRet;
 }
 
-bool ToggleUnicodeCodepoint::AllowMoreInput(sal_Unicode uChar)
+bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar)
 {
     //arbitrarily chosen maximum length allowed - normal max usage would be 
around 30.
     if( maInput.getLength() > 255 )
@@ -1092,12 +1092,20 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_Unicode 
uChar)
 
             if( rtl::isLowSurrogate(uChar) && maUtf16.isEmpty() && 
maInput.isEmpty()  )
             {
-                maUtf16.append(uChar);
+                maUtf16.append(sal_Unicode(uChar));
                 return true;
             }
             if( rtl::isHighSurrogate(uChar) && maInput.isEmpty() )
-                maUtf16.insert(0, uChar );
-            //end of hex strings, or unexpected order of high/low, so don't 
accept more
+                maUtf16.insert(0, sal_Unicode(uChar));
+            if (maUtf16.getLength() == 2)
+            {
+                assert(rtl::isHighSurrogate(maUtf16[0]) && 
rtl::isLowSurrogate(maUtf16[1]));
+                // The resulting codepoint may itself be combining, so may 
allow more
+                sal_uInt32 nUCS4 = rtl::combineSurrogates(maUtf16[0], 
maUtf16[1]);
+                maUtf16.setLength(0);
+                return AllowMoreInput(nUCS4);
+            }
+            // unexpected order of high/low, so don't accept more
             if( !maUtf16.isEmpty() )
                 maInput.append(maUtf16);
             if( !maCombining.isEmpty() )
@@ -1122,7 +1130,7 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_Unicode 
uChar)
                 mbAllowMoreChars = false;
                 return false;
             }
-            maCombining.insert(0, uChar);
+            maCombining.insertUtf32(0, uChar);
             break;
 
         default:
@@ -1138,7 +1146,7 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_Unicode 
uChar)
 
             if( !maCombining.isEmpty() )
             {
-                maCombining.insert(0, uChar);
+                maCombining.insertUtf32(0, uChar);
                 maInput = maCombining;
                 mbAllowMoreChars = false;
                 return false;
diff --git a/include/i18nutil/unicode.hxx b/include/i18nutil/unicode.hxx
index be08595e0b10..b7722fd77196 100644
--- a/include/i18nutil/unicode.hxx
+++ b/include/i18nutil/unicode.hxx
@@ -102,11 +102,11 @@ private:
 
 public:
     /**
-    Build an input string of valid UTF16 units to toggle.
+    Build an input string of valid UTF16/UCS4 units to toggle.
         -do not call the other functions until the input process is complete
         -build string from Right to Left.  (Start from the character to the 
left of the cursor: move left.)
     */
-    bool AllowMoreInput(sal_Unicode uChar);
+    bool AllowMoreInput(sal_uInt32 uChar);
 
     /**
     Validates (and potentially modifies) the input string.
diff --git a/sw/qa/extras/uiwriter/uiwriter7.cxx 
b/sw/qa/extras/uiwriter/uiwriter7.cxx
index 4b3f1c9c533f..18c7fefd3248 100644
--- a/sw/qa/extras/uiwriter/uiwriter7.cxx
+++ b/sw/qa/extras/uiwriter/uiwriter7.cxx
@@ -2405,8 +2405,8 @@ CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, 
testUnicodeNotationToggle)
     sDocString = 
pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText();
     CPPUNIT_ASSERT_EQUAL(sOriginalDocString, sDocString);
 
-    constexpr OUString sWithCombiningSMPName = u"xyzU+e0101"_ustr;
-    constexpr OUString sWithCombiningSMP = u"xyz\U000e0101"_ustr;
+    constexpr OUString sWithCombiningSMPName = u"xyzU+4faeU+e0101"_ustr;
+    constexpr OUString sWithCombiningSMP = u"xyz\U00004fae\U000e0101"_ustr;
     pWrtShell->SplitNode();
     pWrtShell->Insert2(sWithCombiningSMPName);
     dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, 
aPropertyValues);
@@ -2416,9 +2416,13 @@ CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, 
testUnicodeNotationToggle)
     dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, 
aPropertyValues);
     sDocString = 
pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText();
     // Before tdf#162656 fix, this failed with
-    // - Expected: xyzU+e0101
-    // - Actual  : xyU+e0101
+    // - Expected: xyzU+4faeU+e0101
+    // - Actual  : xyxU+e0101
     // i.e., one codepoint to the left of the combining codepoint was removed
+    // Before tdf#162657 fix, this failed with
+    // - Expected: xyzU+4faeU+e0101
+    // - Actual  : xyz侮U+e0101
+    // i.e., one codepoint to the left of the combining codepoint was not 
converted
     CPPUNIT_ASSERT_EQUAL(sWithCombiningSMPName, sDocString);
 }
 
commit 5f1579c6fe56960f529e96f629d9a37352752345
Author:     Mike Kaganski <mike.kagan...@collabora.com>
AuthorDate: Wed Aug 28 17:18:09 2024 +0500
Commit:     Mike Kaganski <mike.kagan...@collabora.com>
CommitDate: Wed Aug 28 20:51:59 2024 +0200

    tdf#162656: make sure to replace correct range
    
    ToggleUnicodeCodepoint::CharsToDelete returns number of codepoints,
    while SwWrtShell::DelLeft operates glyphs, which may consist of a
    number of combining codepoints.
    
    Change-Id: I394ee76c576f2e6f921cb30d125cce2f83559608
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/172532
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>

diff --git a/sw/qa/extras/uiwriter/uiwriter7.cxx 
b/sw/qa/extras/uiwriter/uiwriter7.cxx
index 1070f5f26652..4b3f1c9c533f 100644
--- a/sw/qa/extras/uiwriter/uiwriter7.cxx
+++ b/sw/qa/extras/uiwriter/uiwriter7.cxx
@@ -2399,11 +2399,27 @@ CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, 
testUnicodeNotationToggle)
     dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, 
aPropertyValues);
     sExpectedString = "u+";
     sDocString = 
pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText();
-    CPPUNIT_ASSERT_EQUAL(sDocString, sExpectedString);
+    CPPUNIT_ASSERT_EQUAL(sExpectedString, sDocString);
 
     dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, 
aPropertyValues);
     sDocString = 
pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText();
-    CPPUNIT_ASSERT_EQUAL(sDocString, sOriginalDocString);
+    CPPUNIT_ASSERT_EQUAL(sOriginalDocString, sDocString);
+
+    constexpr OUString sWithCombiningSMPName = u"xyzU+e0101"_ustr;
+    constexpr OUString sWithCombiningSMP = u"xyz\U000e0101"_ustr;
+    pWrtShell->SplitNode();
+    pWrtShell->Insert2(sWithCombiningSMPName);
+    dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, 
aPropertyValues);
+    sDocString = 
pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText();
+    CPPUNIT_ASSERT_EQUAL(sWithCombiningSMP, sDocString);
+
+    dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, 
aPropertyValues);
+    sDocString = 
pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText();
+    // Before tdf#162656 fix, this failed with
+    // - Expected: xyzU+e0101
+    // - Actual  : xyU+e0101
+    // i.e., one codepoint to the left of the combining codepoint was removed
+    CPPUNIT_ASSERT_EQUAL(sWithCombiningSMPName, sDocString);
 }
 
 CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testTdf34957)
diff --git a/sw/source/uibase/shells/textsh1.cxx 
b/sw/source/uibase/shells/textsh1.cxx
index 9b11e6f1b4ef..fa0a4fad018f 100644
--- a/sw/source/uibase/shells/textsh1.cxx
+++ b/sw/source/uibase/shells/textsh1.cxx
@@ -822,8 +822,9 @@ void SwTextShell::Execute(SfxRequest &rReq)
                     rWrtSh.InfoReadOnlyDialog(false);
                     break;
                 }
+                OUString stringToReplace = aToggle.StringToReplace();
                 SwRewriter aRewriter;
-                aRewriter.AddRule( UndoArg1, aToggle.StringToReplace() );
+                aRewriter.AddRule( UndoArg1, stringToReplace );
                 aRewriter.AddRule( UndoArg2, SwResId(STR_YIELDS) );
                 aRewriter.AddRule( UndoArg3, sReplacement );
                 rWrtSh.StartUndo(SwUndoId::REPLACE, &aRewriter);
@@ -832,9 +833,10 @@ void SwTextShell::Execute(SfxRequest &rReq)
                 rWrtSh.ClearMark();
                 if( rWrtSh.IsInSelect() )  // cancel any in-progress keyboard 
selection as well
                     rWrtSh.EndSelect();
-
-                for( sal_uInt32 i=aToggle.CharsToDelete(); i > 0; --i )
-                    rWrtSh.DelLeft();
+                // Select exactly what was chosen for replacement
+                rWrtSh.GetCursor()->SetMark();
+                
rWrtSh.GetCursor()->GetPoint()->AdjustContent(-stringToReplace.getLength());
+                rWrtSh.DelLeft();
                 rWrtSh.Insert2( sReplacement );
                 rWrtSh.EndUndo(SwUndoId::REPLACE, &aRewriter);
             }

Reply via email to