i18nutil/source/utility/unicode.cxx | 33 ++++++++++++++------------------- include/i18nutil/unicode.hxx | 10 ++-------- sw/qa/extras/uiwriter/uiwriter7.cxx | 24 ++++++++++++++++++++++-- sw/source/uibase/shells/textsh1.cxx | 10 ++++++---- 4 files changed, 44 insertions(+), 33 deletions(-)
New commits: commit b54f188b38b2fdb7e3224e5826162d7186c23288 Author: Mike Kaganski <mike.kagan...@collabora.com> AuthorDate: Wed Aug 28 18:23:55 2024 +0500 Commit: Mike Kaganski <mike.kagan...@collabora.com> CommitDate: Wed Aug 28 20:52:12 2024 +0200 Drop newly unused ToggleUnicodeCodepoint::CharsToDelete Change-Id: I25654d911b1cc11f8c0e7d0ef3e87898418c3408 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/172535 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> diff --git a/i18nutil/source/utility/unicode.cxx b/i18nutil/source/utility/unicode.cxx index ae40fa421c0a..a1e03bd5b41e 100644 --- a/i18nutil/source/utility/unicode.cxx +++ b/i18nutil/source/utility/unicode.cxx @@ -1270,19 +1270,6 @@ OUString ToggleUnicodeCodepoint::StringToReplace() return maInput.toString(); } -sal_uInt32 ToggleUnicodeCodepoint::CharsToDelete() -{ - OUString sIn = StringToReplace(); - sal_Int32 nPos = 0; - sal_uInt32 counter = 0; - while( nPos < sIn.getLength() ) - { - sIn.iterateCodePoints(&nPos); - ++counter; - } - return counter; -} - OUString ToggleUnicodeCodepoint::ReplacementString() { OUString sIn = StringToReplace(); diff --git a/include/i18nutil/unicode.hxx b/include/i18nutil/unicode.hxx index b7722fd77196..ddb75784784f 100644 --- a/include/i18nutil/unicode.hxx +++ b/include/i18nutil/unicode.hxx @@ -115,12 +115,6 @@ public: */ OUString StringToReplace(); OUString ReplacementString(); - - /** - While sInput.getLength() returns the number of utf16 units to delete, - this function returns the number of "characters" to delete - potentially a smaller number - */ - sal_uInt32 CharsToDelete(); }; #endif commit ac0117713e6ce248c7725b57d709390f27dc4e62 Author: Mike Kaganski <mike.kagan...@collabora.com> AuthorDate: Wed Aug 28 18:07:56 2024 +0500 Commit: Mike Kaganski <mike.kagan...@collabora.com> CommitDate: Wed Aug 28 20:52:04 2024 +0200 tdf#162657: re-check the result of combining surrogates They may constitute a combining mark, and require more characters to the left to be processed. Change-Id: Ie626badf66edaf44c56e0ba2d294304f139d2bca Reviewed-on: https://gerrit.libreoffice.org/c/core/+/172533 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> diff --git a/i18nutil/source/utility/unicode.cxx b/i18nutil/source/utility/unicode.cxx index b0e1cd4d85ef..ae40fa421c0a 100644 --- a/i18nutil/source/utility/unicode.cxx +++ b/i18nutil/source/utility/unicode.cxx @@ -1068,7 +1068,7 @@ OUString unicode::formatPercent(double dNumber, return aRet; } -bool ToggleUnicodeCodepoint::AllowMoreInput(sal_Unicode uChar) +bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) { //arbitrarily chosen maximum length allowed - normal max usage would be around 30. if( maInput.getLength() > 255 ) @@ -1092,12 +1092,20 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_Unicode uChar) if( rtl::isLowSurrogate(uChar) && maUtf16.isEmpty() && maInput.isEmpty() ) { - maUtf16.append(uChar); + maUtf16.append(sal_Unicode(uChar)); return true; } if( rtl::isHighSurrogate(uChar) && maInput.isEmpty() ) - maUtf16.insert(0, uChar ); - //end of hex strings, or unexpected order of high/low, so don't accept more + maUtf16.insert(0, sal_Unicode(uChar)); + if (maUtf16.getLength() == 2) + { + assert(rtl::isHighSurrogate(maUtf16[0]) && rtl::isLowSurrogate(maUtf16[1])); + // The resulting codepoint may itself be combining, so may allow more + sal_uInt32 nUCS4 = rtl::combineSurrogates(maUtf16[0], maUtf16[1]); + maUtf16.setLength(0); + return AllowMoreInput(nUCS4); + } + // unexpected order of high/low, so don't accept more if( !maUtf16.isEmpty() ) maInput.append(maUtf16); if( !maCombining.isEmpty() ) @@ -1122,7 +1130,7 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_Unicode uChar) mbAllowMoreChars = false; return false; } - maCombining.insert(0, uChar); + maCombining.insertUtf32(0, uChar); break; default: @@ -1138,7 +1146,7 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_Unicode uChar) if( !maCombining.isEmpty() ) { - maCombining.insert(0, uChar); + maCombining.insertUtf32(0, uChar); maInput = maCombining; mbAllowMoreChars = false; return false; diff --git a/include/i18nutil/unicode.hxx b/include/i18nutil/unicode.hxx index be08595e0b10..b7722fd77196 100644 --- a/include/i18nutil/unicode.hxx +++ b/include/i18nutil/unicode.hxx @@ -102,11 +102,11 @@ private: public: /** - Build an input string of valid UTF16 units to toggle. + Build an input string of valid UTF16/UCS4 units to toggle. -do not call the other functions until the input process is complete -build string from Right to Left. (Start from the character to the left of the cursor: move left.) */ - bool AllowMoreInput(sal_Unicode uChar); + bool AllowMoreInput(sal_uInt32 uChar); /** Validates (and potentially modifies) the input string. diff --git a/sw/qa/extras/uiwriter/uiwriter7.cxx b/sw/qa/extras/uiwriter/uiwriter7.cxx index 4b3f1c9c533f..18c7fefd3248 100644 --- a/sw/qa/extras/uiwriter/uiwriter7.cxx +++ b/sw/qa/extras/uiwriter/uiwriter7.cxx @@ -2405,8 +2405,8 @@ CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testUnicodeNotationToggle) sDocString = pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText(); CPPUNIT_ASSERT_EQUAL(sOriginalDocString, sDocString); - constexpr OUString sWithCombiningSMPName = u"xyzU+e0101"_ustr; - constexpr OUString sWithCombiningSMP = u"xyz\U000e0101"_ustr; + constexpr OUString sWithCombiningSMPName = u"xyzU+4faeU+e0101"_ustr; + constexpr OUString sWithCombiningSMP = u"xyz\U00004fae\U000e0101"_ustr; pWrtShell->SplitNode(); pWrtShell->Insert2(sWithCombiningSMPName); dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, aPropertyValues); @@ -2416,9 +2416,13 @@ CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testUnicodeNotationToggle) dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, aPropertyValues); sDocString = pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText(); // Before tdf#162656 fix, this failed with - // - Expected: xyzU+e0101 - // - Actual : xyU+e0101 + // - Expected: xyzU+4faeU+e0101 + // - Actual : xyxU+e0101 // i.e., one codepoint to the left of the combining codepoint was removed + // Before tdf#162657 fix, this failed with + // - Expected: xyzU+4faeU+e0101 + // - Actual : xyz侮U+e0101 + // i.e., one codepoint to the left of the combining codepoint was not converted CPPUNIT_ASSERT_EQUAL(sWithCombiningSMPName, sDocString); } commit 5f1579c6fe56960f529e96f629d9a37352752345 Author: Mike Kaganski <mike.kagan...@collabora.com> AuthorDate: Wed Aug 28 17:18:09 2024 +0500 Commit: Mike Kaganski <mike.kagan...@collabora.com> CommitDate: Wed Aug 28 20:51:59 2024 +0200 tdf#162656: make sure to replace correct range ToggleUnicodeCodepoint::CharsToDelete returns number of codepoints, while SwWrtShell::DelLeft operates glyphs, which may consist of a number of combining codepoints. Change-Id: I394ee76c576f2e6f921cb30d125cce2f83559608 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/172532 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> diff --git a/sw/qa/extras/uiwriter/uiwriter7.cxx b/sw/qa/extras/uiwriter/uiwriter7.cxx index 1070f5f26652..4b3f1c9c533f 100644 --- a/sw/qa/extras/uiwriter/uiwriter7.cxx +++ b/sw/qa/extras/uiwriter/uiwriter7.cxx @@ -2399,11 +2399,27 @@ CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testUnicodeNotationToggle) dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, aPropertyValues); sExpectedString = "u+"; sDocString = pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText(); - CPPUNIT_ASSERT_EQUAL(sDocString, sExpectedString); + CPPUNIT_ASSERT_EQUAL(sExpectedString, sDocString); dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, aPropertyValues); sDocString = pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText(); - CPPUNIT_ASSERT_EQUAL(sDocString, sOriginalDocString); + CPPUNIT_ASSERT_EQUAL(sOriginalDocString, sDocString); + + constexpr OUString sWithCombiningSMPName = u"xyzU+e0101"_ustr; + constexpr OUString sWithCombiningSMP = u"xyz\U000e0101"_ustr; + pWrtShell->SplitNode(); + pWrtShell->Insert2(sWithCombiningSMPName); + dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, aPropertyValues); + sDocString = pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText(); + CPPUNIT_ASSERT_EQUAL(sWithCombiningSMP, sDocString); + + dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, aPropertyValues); + sDocString = pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText(); + // Before tdf#162656 fix, this failed with + // - Expected: xyzU+e0101 + // - Actual : xyU+e0101 + // i.e., one codepoint to the left of the combining codepoint was removed + CPPUNIT_ASSERT_EQUAL(sWithCombiningSMPName, sDocString); } CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testTdf34957) diff --git a/sw/source/uibase/shells/textsh1.cxx b/sw/source/uibase/shells/textsh1.cxx index 9b11e6f1b4ef..fa0a4fad018f 100644 --- a/sw/source/uibase/shells/textsh1.cxx +++ b/sw/source/uibase/shells/textsh1.cxx @@ -822,8 +822,9 @@ void SwTextShell::Execute(SfxRequest &rReq) rWrtSh.InfoReadOnlyDialog(false); break; } + OUString stringToReplace = aToggle.StringToReplace(); SwRewriter aRewriter; - aRewriter.AddRule( UndoArg1, aToggle.StringToReplace() ); + aRewriter.AddRule( UndoArg1, stringToReplace ); aRewriter.AddRule( UndoArg2, SwResId(STR_YIELDS) ); aRewriter.AddRule( UndoArg3, sReplacement ); rWrtSh.StartUndo(SwUndoId::REPLACE, &aRewriter); @@ -832,9 +833,10 @@ void SwTextShell::Execute(SfxRequest &rReq) rWrtSh.ClearMark(); if( rWrtSh.IsInSelect() ) // cancel any in-progress keyboard selection as well rWrtSh.EndSelect(); - - for( sal_uInt32 i=aToggle.CharsToDelete(); i > 0; --i ) - rWrtSh.DelLeft(); + // Select exactly what was chosen for replacement + rWrtSh.GetCursor()->SetMark(); + rWrtSh.GetCursor()->GetPoint()->AdjustContent(-stringToReplace.getLength()); + rWrtSh.DelLeft(); rWrtSh.Insert2( sReplacement ); rWrtSh.EndUndo(SwUndoId::REPLACE, &aRewriter); }