source

AnamayNarkar (via logerrit) Mon, 02 Mar 2026 22:20:04 -0800

 i18nutil/CppunitTest_i18nutil_unicodeescape.mk |   25 +++++
 i18nutil/Library_i18nutil.mk                   |    1 
 i18nutil/Module_i18nutil.mk                    |    1 
 i18nutil/qa/cppunit/test_unicodeescape.cxx     |  121 +++++++++++++++++++++++++
 i18nutil/source/utility/unicodeescape.cxx      |  102 +++++++++++++++++++++
 include/i18nutil/unicodeescape.hxx             |   31 ++++++
 sc/qa/unit/ucalc.cxx                           |   51 ++++++++++
 sc/source/core/data/table6.cxx                 |    2 
 sw/qa/extras/uiwriter/uiwriter7.cxx            |   52 ++++++++++
 sw/source/core/crsr/findtxt.cxx                |   10 +-
 10 files changed, 395 insertions(+), 1 deletion(-)


New commits:
commit fc800ec1791d5b0b614577b345fbea65933a229b
Author:     AnamayNarkar <[email protected]>
AuthorDate: Mon Mar 2 05:15:23 2026 +0530
Commit:     Mike Kaganski <[email protected]>
CommitDate: Tue Mar 3 07:19:46 2026 +0100

    tdf#106137 sc: process Unicode escapes in regex replacement strings
    
    This uses i18nutil::processUnicodeEscapes(), introduced in the
    parent commit.
    
    Change-Id: Ieb780d494ea3cdb8a976bb4ca33d95baa7127c8e
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/200739
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <[email protected]>

diff --git a/sc/qa/unit/ucalc.cxx b/sc/qa/unit/ucalc.cxx
index 34b6f4e2bda0..79c579016ac6 100644
--- a/sc/qa/unit/ucalc.cxx
+++ b/sc/qa/unit/ucalc.cxx
@@ -4460,6 +4460,57 @@ CPPUNIT_TEST_FIXTURE(Test, testSearchCells)
     m_pDoc->DeleteTab(0);
 }
 
+CPPUNIT_TEST_FIXTURE(Test, testTdf106137_UnicodeEscapeInReplacement)
+{
+    m_pDoc->InsertTab(0, u"Test"_ustr);
+
+    // test basic unicode escape expansion
+    // unicode values in replacement strings should expand to Unicode 
character when regular expressions is selected
+    m_pDoc->SetString(ScAddress(0, 0, 0), u"hello world"_ustr);
+
+    SvxSearchItem aItem(SID_SEARCH_ITEM);
+    aItem.SetSearchString(u"world"_ustr);
+    aItem.SetReplaceString(u"\u0041\u0042\u0043"_ustr);
+    aItem.SetCommand(SvxSearchCmd::REPLACE_ALL);
+    aItem.SetRegExp(true);
+
+    ScMarkData aMarkData(m_pDoc->GetSheetLimits());
+    aMarkData.SelectOneTable(0);
+    SCCOL nCol = 0;
+    SCROW nRow = 0;
+    SCTAB nTab = 0;
+    ScRangeList aMatchedRanges;
+    OUString aUndoStr;
+    bool bClamped = false;
+
+    m_pDoc->SearchAndReplace(aItem, nCol, nRow, nTab, aMarkData, 
aMatchedRanges, aUndoStr, nullptr,
+                             bClamped);
+    CPPUNIT_ASSERT_EQUAL(u"hello ABC"_ustr, m_pDoc->GetString(ScAddress(0, 0, 
0)));
+
+    // test Backreference edge case
+    // \uXXXX in replacement template should expand before back-references are 
substituted,
+    // so that the 'searched' content containing literal \u sequences is not 
accidentally expanded
+
+    // Insert text that contains an actual escaped unicode string
+    m_pDoc->SetString(ScAddress(0, 1, 0), u"find \u0042"_ustr); // A2
+
+    // Capture the literal "\u0042" in a regex group
+    aItem.SetSearchString(u"(find .*)"_ustr);
+    // Replace with \u0041 (which should become 'A') + the backreference
+    aItem.SetReplaceString(u"\u0041 $1"_ustr);
+
+    nCol = 0;
+    nRow = 0;
+    nTab = 0;
+    m_pDoc->SearchAndReplace(aItem, nCol, nRow, nTab, aMarkData, 
aMatchedRanges, aUndoStr, nullptr,
+                             bClamped);
+
+    // shouldn't be 'A find B'
+    CPPUNIT_ASSERT_EQUAL(u"A find \u0042"_ustr, m_pDoc->GetString(ScAddress(0, 
1, 0)));
+
+    m_pDoc->DeleteTab(0);
+}
+
 CPPUNIT_TEST_FIXTURE(Test, testFormulaPosition)
 {
     m_pDoc->InsertTab(0, u"Test"_ustr);
diff --git a/sc/source/core/data/table6.cxx b/sc/source/core/data/table6.cxx
index 970ae62e6467..89803bb125cb 100644
--- a/sc/source/core/data/table6.cxx
+++ b/sc/source/core/data/table6.cxx
@@ -32,6 +32,7 @@
 #include <markdata.hxx>
 #include <editutil.hxx>
 #include <postit.hxx>
+#include <i18nutil/unicodeescape.hxx>
 
 namespace {
 
@@ -185,6 +186,7 @@ bool ScTable::SearchCell(const SvxSearchItem& rSearchItem, 
SCCOL nCol, sc::Colum
         OUString sReplStr = rSearchItem.GetReplaceString();
         if (rSearchItem.GetRegExp())
         {
+            sReplStr = i18nutil::processUnicodeEscapes(sReplStr);
             utl::TextSearch::ReplaceBackReferences( sReplStr, aString, 
aSearchResult );
             OUStringBuffer aStrBuffer(aString);
             aStrBuffer.remove(nStart, nEnd-nStart+1);
commit 5c7833fee241d2ec840b0a9998d6900cdd844e58
Author:     AnamayNarkar <[email protected]>
AuthorDate: Sat Feb 21 15:44:14 2026 +0530
Commit:     Mike Kaganski <[email protected]>
CommitDate: Tue Mar 3 07:19:39 2026 +0100

    tdf#106137 sw: process Unicode escapes in regex replacement strings
    
    When using Find & Replace with regular expressions enabled,
    \uhhhh and \Uhhhhhhhh sequences in the replacement string are
    now expanded to their corresponding Unicode characters.
    
    This uses i18nutil::processUnicodeEscapes(), introduced in the
    parent commit.
    
    Change-Id: Ifc2a634e9b5eb2589a46b309441ea7691d2321b7
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/199924
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <[email protected]>

diff --git a/sw/qa/extras/uiwriter/uiwriter7.cxx 
b/sw/qa/extras/uiwriter/uiwriter7.cxx
index c22b02d415e0..cb66eff3471f 100644
--- a/sw/qa/extras/uiwriter/uiwriter7.cxx
+++ b/sw/qa/extras/uiwriter/uiwriter7.cxx
@@ -2961,6 +2961,58 @@ CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testTdf149089)
     CPPUNIT_ASSERT_EQUAL(nGridWidth1, nGridWidth2);
 }
 
+CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testTdf106137_UnicodeEscapeInReplacement)
+{
+    // unicode values in replacement strings should expand to Unicode 
character when regular expressions is selected as a option
+    createSwDoc();
+    SwDoc* pDoc = getSwDoc();
+    SwCursorShell* pShell(pDoc->GetEditShell());
+    CPPUNIT_ASSERT(pShell);
+    SwPaM* pCursor = pShell->GetCursor();
+    IDocumentContentOperations& rIDCO(pDoc->getIDocumentContentOperations());
+
+    rIDCO.InsertString(*pCursor, u"hello world"_ustr);
+
+    uno::Reference<util::XReplaceable> xReplace(mxComponent, uno::UNO_QUERY);
+    uno::Reference<util::XReplaceDescriptor> xReplaceDes = 
xReplace->createReplaceDescriptor();
+    xReplaceDes->setPropertyValue(u"SearchRegularExpression"_ustr, 
uno::Any(true));
+
+    xReplaceDes->setSearchString(u"world"_ustr);
+    xReplaceDes->setReplaceString(u"\u0041\u0042\u0043"_ustr);
+    sal_Int32 nCount = xReplace->replaceAll(xReplaceDes);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), nCount);
+    CPPUNIT_ASSERT_EQUAL(u"hello ABC"_ustr, 
pCursor->GetPointNode().GetTextNode()->GetText());
+}
+
+CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, 
testTdf106137_UnicodeEscapeInReplacement_BackRef)
+{
+    // \uXXXX in replacement template should expand before back-references are 
substituted,
+    // so that the 'searched' content containing literal \u sequences is not 
accidentally expanded
+    createSwDoc();
+    SwDoc* pDoc = getSwDoc();
+    SwCursorShell* pShell(pDoc->GetEditShell());
+    CPPUNIT_ASSERT(pShell);
+    SwPaM* pCursor = pShell->GetCursor();
+    IDocumentContentOperations& rIDCO(pDoc->getIDocumentContentOperations());
+
+    // Insert text that contains a actual excaped unicode string
+    rIDCO.InsertString(*pCursor, u"find \u0042"_ustr);
+
+    uno::Reference<util::XReplaceable> xReplace(mxComponent, uno::UNO_QUERY);
+    uno::Reference<util::XReplaceDescriptor> xReplaceDes = 
xReplace->createReplaceDescriptor();
+    xReplaceDes->setPropertyValue(u"SearchRegularExpression"_ustr, 
uno::Any(true));
+
+    // Capture the literal "\u0042" in a regex group
+    xReplaceDes->setSearchString(u"(find .*)"_ustr);
+    // Replace with \u0041 (which should become 'A') + the backreference
+    xReplaceDes->setReplaceString(u"\u0041 $1"_ustr);
+    sal_Int32 nCount = xReplace->replaceAll(xReplaceDes);
+
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), nCount);
+    // shouldn't be 'A find B'
+    CPPUNIT_ASSERT_EQUAL(u"A find \u0042"_ustr, 
pCursor->GetPointNode().GetTextNode()->GetText());
+}
+
 CPPUNIT_PLUGIN_IMPLEMENT();
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/core/crsr/findtxt.cxx b/sw/source/core/crsr/findtxt.cxx
index 07a19bde59b7..264b1261cafa 100644
--- a/sw/source/core/crsr/findtxt.cxx
+++ b/sw/source/core/crsr/findtxt.cxx
@@ -53,6 +53,7 @@
 #include <docsh.hxx>
 #include <PostItMgr.hxx>
 #include <view.hxx>
+#include <i18nutil/unicodeescape.hxx>
 
 using namespace ::com::sun::star;
 using namespace util;
@@ -984,8 +985,14 @@ int SwFindParaText::DoFind(SwPaM & rCursor, 
SwMoveFnCollection const & fnMove,
         std::optional<OUString> xRepl;
         if (bRegExp)
             xRepl = sw::ReplaceBackReferences(m_rSearchOpt, &rCursor, 
m_pLayout);
+
+        // process \uhhhh and \Uhhhhhhhh escapes for regex replacements
+        OUString aFinalReplStr = xRepl ? *xRepl : m_rSearchOpt.replaceString;
+        if (bRegExp && !xRepl) // fallback for when ReplaceBackReferences 
returns null
+            aFinalReplStr = i18nutil::processUnicodeEscapes(aFinalReplStr);
+
         bool const bReplaced = sw::ReplaceImpl(rCursor,
-                xRepl ? *xRepl : m_rSearchOpt.replaceString,
+                aFinalReplStr,
                 bRegExp, m_rCursor.GetDoc(), m_pLayout);
 
         m_rCursor.SaveTableBoxContent( rCursor.GetPoint() );
@@ -1149,6 +1156,7 @@ std::optional<OUString> ReplaceBackReferences(const 
i18nutil::SearchOptions2& rS
             utl::TextSearch aSText(rSearchOpt);
             SearchResult aResult;
             OUString aReplaceStr( rSearchOpt.replaceString );
+            aReplaceStr = i18nutil::processUnicodeEscapes(aReplaceStr);
             if (bParaEnd)
             {
                 static constexpr OUString aStr(u"\n"_ustr);
commit 5936fab3e6293101a536f55904ff2072f2bb133e
Author:     AnamayNarkar <[email protected]>
AuthorDate: Fri Feb 6 02:24:09 2026 +0530
Commit:     Mike Kaganski <[email protected]>
CommitDate: Tue Mar 3 07:19:32 2026 +0100

    tdf#106137 i18nutil: Add helper to process Unicode escapes
    
    Adds a new utility function processUnicodeEscapes() to i18nutil.
    This supports parsing:
    * Standard escapes: \uhhhh (e.g. \u0041)
    * Extended escapes: \Uhhhhhhhh (e.g. \U0001F600)
    * Escaped backslashes: \
    
    Change-Id: I589eb2403376c8cef694f52f1c785d42fba94ff9
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/198777
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <[email protected]>

diff --git a/i18nutil/CppunitTest_i18nutil_unicodeescape.mk 
b/i18nutil/CppunitTest_i18nutil_unicodeescape.mk
new file mode 100644
index 000000000000..7efe55097331
--- /dev/null
+++ b/i18nutil/CppunitTest_i18nutil_unicodeescape.mk
@@ -0,0 +1,25 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_CppunitTest_CppunitTest,i18nutil_unicodeescape))
+
+$(eval $(call 
gb_CppunitTest_use_external,i18nutil_unicodeescape,boost_headers))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,i18nutil_unicodeescape, \
+    i18nutil/qa/cppunit/test_unicodeescape \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,i18nutil_unicodeescape, \
+    cppu \
+    cppuhelper \
+    i18nutil \
+    sal \
+))
+
+# vim: set noet sw=4 ts=4:
diff --git a/i18nutil/Library_i18nutil.mk b/i18nutil/Library_i18nutil.mk
index cf166a476401..4c6923e8a621 100644
--- a/i18nutil/Library_i18nutil.mk
+++ b/i18nutil/Library_i18nutil.mk
@@ -52,6 +52,7 @@ $(eval $(call gb_Library_add_exception_objects,i18nutil,\
        i18nutil/source/utility/scriptclass \
        i18nutil/source/utility/scripttypedetector \
        i18nutil/source/utility/unicode \
+       i18nutil/source/utility/unicodeescape \
        i18nutil/source/utility/widthfolding \
 ))
 
diff --git a/i18nutil/Module_i18nutil.mk b/i18nutil/Module_i18nutil.mk
index 3fac872e83ed..dc9bc57c62cf 100644
--- a/i18nutil/Module_i18nutil.mk
+++ b/i18nutil/Module_i18nutil.mk
@@ -14,6 +14,7 @@ $(eval $(call gb_Module_add_targets,i18nutil,\
 
 $(eval $(call gb_Module_add_check_targets,i18nutil,\
        CppunitTest_i18nutil \
+       CppunitTest_i18nutil_unicodeescape \
 ))
 
 # vim: set noet sw=4:
diff --git a/i18nutil/qa/cppunit/test_unicodeescape.cxx 
b/i18nutil/qa/cppunit/test_unicodeescape.cxx
new file mode 100644
index 000000000000..2cb098e692b8
--- /dev/null
+++ b/i18nutil/qa/cppunit/test_unicodeescape.cxx
@@ -0,0 +1,121 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; 
fill-column: 100 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <cppunit/TestFixture.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/plugin/TestPlugIn.h>
+#include <i18nutil/unicodeescape.hxx>
+#include <o3tl/string_view.hxx>
+
+using namespace o3tl;
+
+class TestUnicodeEscape : public CppUnit::TestFixture
+{
+public:
+    void testBasicUnicodeEscape();
+    void testExtendedUnicodeEscape();
+    void testEscapedBackslash();
+    void testInvalidEscape();
+    void testControlCharFiltering();
+    void testSurrogatePair();
+
+    CPPUNIT_TEST_SUITE(TestUnicodeEscape);
+    CPPUNIT_TEST(testBasicUnicodeEscape);
+    CPPUNIT_TEST(testExtendedUnicodeEscape);
+    CPPUNIT_TEST(testEscapedBackslash);
+    CPPUNIT_TEST(testInvalidEscape);
+    CPPUNIT_TEST(testControlCharFiltering);
+    CPPUNIT_TEST(testSurrogatePair);
+    CPPUNIT_TEST_SUITE_END();
+};
+
+void TestUnicodeEscape::testBasicUnicodeEscape()
+{
+    // \u0041 should become 'A'
+    OUString input(u"\u0041"_ustr);
+    OUString result = i18nutil::processUnicodeEscapes(input);
+    CPPUNIT_ASSERT_EQUAL(u"A"_ustr, result);
+
+    // Multiple escapes
+    input = u"\u0041\u0042\u0043"_ustr;
+    result = i18nutil::processUnicodeEscapes(input);
+    CPPUNIT_ASSERT_EQUAL(u"ABC"_ustr, result);
+
+    // Mixed with text
+    input = u"Hello \u0041 World"_ustr;
+    result = i18nutil::processUnicodeEscapes(input);
+    CPPUNIT_ASSERT_EQUAL(u"Hello A World"_ustr, result);
+}
+
+void TestUnicodeEscape::testExtendedUnicodeEscape()
+{
+    // \U00000041 should become 'A'
+    OUString result = i18nutil::processUnicodeEscapes(u"\U00000041");
+    CPPUNIT_ASSERT_EQUAL(u"A"_ustr, result);
+}
+
+void TestUnicodeEscape::testEscapedBackslash()
+{
+    // \u0041 should become literal \u0041
+    OUString result = i18nutil::processUnicodeEscapes(u"\\u0041");
+    CPPUNIT_ASSERT_EQUAL(u"\u0041"_ustr, result);
+}
+
+void TestUnicodeEscape::testInvalidEscape()
+{
+    // Invalid hex should keep literal
+    OUString input(u"\uXYZW"_ustr);
+    OUString result = i18nutil::processUnicodeEscapes(input);
+    CPPUNIT_ASSERT_EQUAL(input, result);
+
+    // Incomplete escape should keep literal
+    input = u"\u004"_ustr;
+    result = i18nutil::processUnicodeEscapes(input);
+    CPPUNIT_ASSERT_EQUAL(input, result);
+}
+
+void TestUnicodeEscape::testControlCharFiltering()
+{
+    // \u000A (LF) should be allowed
+    OUString input(u"\u000A"_ustr);
+    OUString result = i18nutil::processUnicodeEscapes(input);
+    CPPUNIT_ASSERT_EQUAL(u"
"_ustr, result);
+
+    // \u0009 (TAB) should be allowed
+    input = u"\u0009"_ustr;
+    result = i18nutil::processUnicodeEscapes(input);
+    CPPUNIT_ASSERT_EQUAL(u"    "_ustr, result);
+
+    // \u0000 (NULL) should be filtered
+    input = u"\u0000"_ustr;
+    result = i18nutil::processUnicodeEscapes(input);
+    CPPUNIT_ASSERT_EQUAL(input, result); // Should remain literal
+
+    input = u"\\uXYZW"_ustr;
+    result = i18nutil::processUnicodeEscapes(input);
+
+    CPPUNIT_ASSERT_EQUAL(u"\uXYZW"_ustr, result);
+}
+
+void TestUnicodeEscape::testSurrogatePair()
+{
+    // \U0001F600 (emoji) should work with surrogate pairs
+    OUString result = i18nutil::processUnicodeEscapes(u"\U0001F600");
+
+    // U+1F600 = UTF-16: 0xD83D 0xDE00
+    sal_Unicode surrogates[2] = { 0xD83D, 0xDE00 };
+    OUString expected(surrogates, 2);
+
+    CPPUNIT_ASSERT_EQUAL(expected, result);
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION(TestUnicodeEscape);
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s 
cinkeys+=0=break: */
diff --git a/i18nutil/source/utility/unicodeescape.cxx 
b/i18nutil/source/utility/unicodeescape.cxx
new file mode 100644
index 000000000000..cf9fee576565
--- /dev/null
+++ b/i18nutil/source/utility/unicodeescape.cxx
@@ -0,0 +1,102 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; 
fill-column: 100 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <i18nutil/unicodeescape.hxx>
+#include <rtl/character.hxx>
+#include <rtl/ustrbuf.hxx>
+#include <o3tl/numeric.hxx>
+#include <cassert>
+
+namespace i18nutil
+{
+namespace
+{
+// Convert hex string to codepoint
+sal_uInt32 hexToCodepoint(std::u16string_view hexStr)
+{
+    assert(hexStr.size() <= 8);
+
+    sal_uInt32 result = 0;
+    for (sal_Unicode c : hexStr)
+    {
+        sal_Int32 hexValue = o3tl::convertToHex<sal_Int32>(c);
+        if (hexValue < 0)
+            return 0xFFFFFFFF; // Invalid
+        result = (result << 4) | hexValue;
+    }
+    return result;
+}
+
+// Helper: Check if codepoint is allowed
+bool isAllowedCodepoint(sal_uInt32 cp)
+{
+    if (cp < 0x20)
+    {
+        return cp == 0x09 || cp == 0x0A || cp == 0x0D; // TAB, LF, CR
+    }
+    return rtl::isUnicodeCodePoint(cp);
+}
+
+} // local namespace
+
+OUString processUnicodeEscapes(std::u16string_view input)
+{
+    OUStringBuffer result;
+    sal_Int32 len = input.size();
+
+    for (sal_Int32 i = 0; i < len; ++i)
+    {
+        // Check for backslash and ensure we have at least one char after it
+        if (input[i] == '\' && i + 1 < len)
+        {
+            sal_Unicode next = input[i + 1];
+
+            // Handle escaped backslash: (backslash)(backslash) -> (backslash)
+            if (next == '\')
+            {
+                result.append('\');
+                ++i; // Skip the second backslash
+                continue;
+            }
+
+            // Handle \uhhhh (4 hex digits)
+            else if (next == 'u' && i + 5 < len)
+            {
+                sal_uInt32 codepoint = hexToCodepoint(input.substr(i + 2, 4));
+                if (isAllowedCodepoint(codepoint))
+                {
+                    result.append(sal_Unicode(codepoint));
+                    i += 5; // Skip \uhhhh
+                    continue;
+                }
+            }
+
+            // Handle \Uhhhhhhhh (8 hex digits)
+            else if (next == 'U' && i + 9 < len)
+            {
+                sal_uInt32 codepoint = hexToCodepoint(input.substr(i + 2, 8));
+                if (isAllowedCodepoint(codepoint))
+                {
+                    result.appendUtf32(codepoint);
+                    i += 9; // Skip \Uhhhhhhhh
+                    continue;
+                }
+            }
+        }
+
+        // If not a valid escape, or just a regular character, append it as is
+        result.append(input[i]);
+    }
+
+    return result.makeStringAndClear();
+}
+
+} // namespace i18nutil
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s 
cinkeys+=0=break: */
diff --git a/include/i18nutil/unicodeescape.hxx 
b/include/i18nutil/unicodeescape.hxx
new file mode 100644
index 000000000000..064d6d82e380
--- /dev/null
+++ b/include/i18nutil/unicodeescape.hxx
@@ -0,0 +1,31 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include <rtl/ustring.hxx>
+#include <i18nutil/i18nutildllapi.h>
+
+namespace i18nutil
+{
+/**
+ * Process Unicode escape sequences in a string.
+ * Converts \uhhhh (4 hex digits) and \Uhhhhhhhh (8 hex digits) to their
+ * corresponding Unicode characters.
+ * Handles escaped backslashes: \ becomes \
+ * Filters out control characters < 0x20 except TAB, LF, and CR.
+ *
+ * @param input String potentially containing Unicode escape sequences
+ * @return String with escape sequences converted to actual Unicode characters
+ */
+I18NUTIL_DLLPUBLIC OUString processUnicodeEscapes(std::u16string_view input);
+
+} // namespace i18nutil
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s 
cinkeys+=0=break: */

core.git: 3 commits - i18nutil/CppunitTest_i18nutil_unicodeescape.mk i18nutil/Library_i18nutil.mk i18nutil/Module_i18nutil.mk i18nutil/qa i18nutil/source include/i18nutil sc/qa sc/source sw/qa sw/source

Reply via email to