include/tools/inetmime.hxx                                        |   14 +--
 sal/rtl/uri.cxx                                                   |   31 
-------
 sal/rtl/ustrbuf.cxx                                               |   12 ---
 sal/textenc/convertbig5hkscs.cxx                                  |   14 +--
 sal/textenc/converteuctw.cxx                                      |   10 +-
 sal/textenc/convertgb18030.cxx                                    |   13 +--
 sal/textenc/convertisciidevangari.cxx                             |    8 +-
 sal/textenc/convertiso2022cn.cxx                                  |   10 +-
 sal/textenc/convertiso2022jp.cxx                                  |    8 +-
 sal/textenc/convertiso2022kr.cxx                                  |    8 +-
 sal/textenc/convertsinglebytetobmpunicode.cxx                     |    8 +-
 sal/textenc/handleundefinedunicodetotextchar.cxx                  |    4 -
 sal/textenc/tcvtutf8.cxx                                          |   13 +--
 sal/textenc/unichars.hxx                                          |   24 ------
 sax/source/expatwrap/saxwriter.cxx                                |   40 
+++++-----
 stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx |    9 --
 tools/source/inet/inetmime.cxx                                    |   28 
-------
 vcl/source/gdi/scrptrun.cxx                                       |   10 +-
 vcl/win/window/salframe.cxx                                       |    5 -
 19 files changed, 87 insertions(+), 182 deletions(-)

New commits:
commit 8b333c76945960fc62a01829666ba234f59a6d94
Author:     Mike Kaganski <mike.kagan...@collabora.com>
AuthorDate: Fri Dec 24 15:25:52 2021 +0300
Commit:     Mike Kaganski <mike.kagan...@collabora.com>
CommitDate: Fri Dec 24 21:42:08 2021 +0100

    Use rtl functions instead of own surrogate checking/combining
    
    Change-Id: I3eb05d8f5b0761bc3b672d4c855eb469f8cc1a29
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/127375
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>

diff --git a/include/tools/inetmime.hxx b/include/tools/inetmime.hxx
index ce0469fa131a..a3c7de49690c 100644
--- a/include/tools/inetmime.hxx
+++ b/include/tools/inetmime.hxx
@@ -227,16 +227,12 @@ inline int INetMIME::getHexWeight(sal_uInt32 nChar)
 inline sal_uInt32 INetMIME::getUTF32Character(const sal_Unicode *& rBegin,
                                               const sal_Unicode * pEnd)
 {
-    DBG_ASSERT(rBegin && rBegin < pEnd,
+    assert(rBegin && rBegin < pEnd &&
                "INetMIME::getUTF32Character(): Bad sequence");
-    if (rBegin + 1 < pEnd && rBegin[0] >= 0xD800 && rBegin[0] <= 0xDBFF
-        && rBegin[1] >= 0xDC00 && rBegin[1] <= 0xDFFF)
-    {
-        sal_uInt32 nUTF32 = sal_uInt32(*rBegin++ & 0x3FF) << 10;
-        return (nUTF32 | (*rBegin++ & 0x3FF)) + 0x10000;
-    }
-    else
-        return *rBegin++;
+    sal_uInt32 nUTF32 = *rBegin++;
+    if (rBegin < pEnd && rtl::isHighSurrogate(nUTF32) && 
rtl::isLowSurrogate(rBegin[0]))
+        nUTF32 = rtl::combineSurrogates(nUTF32, *rBegin++);
+    return nUTF32;
 }
 
 
diff --git a/sal/rtl/uri.cxx b/sal/rtl/uri.cxx
index 441a6c69f1e8..0c5479563405 100644
--- a/sal/rtl/uri.cxx
+++ b/sal/rtl/uri.cxx
@@ -219,21 +219,7 @@ namespace {
 
 void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 
nUtf32)
 {
-    assert(rtl::isUnicodeCodePoint(nUtf32));
-    if (nUtf32 <= 0xFFFF)
-    {
-        writeUnicode(pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
-    }
-    else
-    {
-        nUtf32 -= 0x10000;
-        writeUnicode(
-            pBuffer, pCapacity,
-            static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
-        writeUnicode(
-            pBuffer, pCapacity,
-            static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
-    }
+    rtl_uStringbuffer_insertUtf32(pBuffer, pCapacity, (*pBuffer)->length, 
nUtf32);
 }
 
 void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
@@ -284,20 +270,7 @@ bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * 
pCapacity,
         rtl_UnicodeToTextConverter aConverter
             = rtl_createUnicodeToTextConverter(eCharset);
         sal_Unicode aSrc[2];
-        sal_Size nSrcSize;
-        if (nUtf32 <= 0xFFFF)
-        {
-            aSrc[0] = static_cast< sal_Unicode >(nUtf32);
-            nSrcSize = 1;
-        }
-        else
-        {
-            aSrc[0] = static_cast< sal_Unicode >(
-                ((nUtf32 - 0x10000) >> 10) | 0xD800);
-            aSrc[1] = static_cast< sal_Unicode >(
-                ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
-            nSrcSize = 2;
-        }
+        sal_Size nSrcSize = rtl::splitSurrogates(nUtf32, aSrc);
 
         char aDst[32]; // FIXME  random value
         sal_uInt32 nInfo;
diff --git a/sal/rtl/ustrbuf.cxx b/sal/rtl/ustrbuf.cxx
index 53e178e5881a..7c5b187edbcb 100644
--- a/sal/rtl/ustrbuf.cxx
+++ b/sal/rtl/ustrbuf.cxx
@@ -174,17 +174,7 @@ void rtl_uStringbuffer_insertUtf32(
     SAL_THROW_EXTERN_C()
 {
     sal_Unicode buf[2];
-    sal_Int32 len;
-    OSL_ASSERT(rtl::isUnicodeScalarValue(c));
-    if (c <= 0xFFFF) {
-        buf[0] = static_cast<sal_Unicode>(c);
-        len = 1;
-    } else {
-        c -= 0x10000;
-        buf[0] = static_cast<sal_Unicode>((c >> 10) | 0xD800);
-        buf[1] = static_cast<sal_Unicode>((c & 0x3FF) | 0xDC00);
-        len = 2;
-    }
+    sal_Int32 len = rtl::splitSurrogates(c, buf);
     rtl_uStringbuffer_insert(pThis, capacity, offset, buf, len);
 }
 
diff --git a/sal/textenc/convertbig5hkscs.cxx b/sal/textenc/convertbig5hkscs.cxx
index 77484666982e..bf3a2e2cf59d 100644
--- a/sal/textenc/convertbig5hkscs.cxx
+++ b/sal/textenc/convertbig5hkscs.cxx
@@ -131,7 +131,7 @@ sal_Size ImplConvertBig5HkscsToUnicode(void const * pData,
                         nUnicode = pBig5Data[nRow].mpToUniTrailTab[nChar - n];
                         if (nUnicode == 0)
                             nUnicode = 0xFFFF;
-                        assert(!ImplIsHighSurrogate(nUnicode));
+                        assert(!rtl::isHighSurrogate(nUnicode));
                     }
                 }
                 if (nUnicode == 0xFFFF)
@@ -192,11 +192,11 @@ sal_Size ImplConvertBig5HkscsToUnicode(void const * pData,
                         }
                         ++p;
                     }
-                    assert(!ImplIsHighSurrogate(nUnicode));
+                    assert(!rtl::isHighSurrogate(nUnicode));
                 }
                 if (nUnicode == 0xFFFF)
                     goto bad_input;
-                if (ImplIsHighSurrogate(nUnicode))
+                if (rtl::isHighSurrogate(nUnicode))
                     if (pDestBufEnd - pDestBufPtr >= 2)
                     {
                         nOffset += nLast - nFirst + 1;
@@ -329,19 +329,19 @@ sal_Size ImplConvertUnicodeToBig5Hkscs(void const * pData,
         sal_uInt32 nChar = *pSrcBuf++;
         if (nHighSurrogate == 0)
         {
-            if (ImplIsHighSurrogate(nChar))
+            if (rtl::isHighSurrogate(nChar))
             {
                 nHighSurrogate = static_cast<sal_Unicode>(nChar);
                 continue;
             }
-            else if (ImplIsLowSurrogate(nChar))
+            else if (rtl::isLowSurrogate(nChar))
             {
                 bUndefined = false;
                 goto bad_input;
             }
         }
-        else if (ImplIsLowSurrogate(nChar))
-            nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
+        else if (rtl::isLowSurrogate(nChar))
+            nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
         else
         {
             bUndefined = false;
diff --git a/sal/textenc/converteuctw.cxx b/sal/textenc/converteuctw.cxx
index edb3c07fa934..8684b643c66e 100644
--- a/sal/textenc/converteuctw.cxx
+++ b/sal/textenc/converteuctw.cxx
@@ -206,7 +206,7 @@ sal_Size ImplConvertEucTwToUnicode(void const * pData,
                             = pCns116431992Data[nOffset + (nChar - nFirst)];
                         if (nUnicode == 0xFFFF)
                             goto bad_input;
-                        else if (ImplIsHighSurrogate(nUnicode))
+                        else if (rtl::isHighSurrogate(nUnicode))
                             if (pDestBufEnd - pDestBufPtr >= 2)
                             {
                                 nOffset += nLast - nFirst + 1;
@@ -340,19 +340,19 @@ sal_Size ImplConvertUnicodeToEucTw(void const * pData,
         sal_uInt32 nChar = *pSrcBuf++;
         if (nHighSurrogate == 0)
         {
-            if (ImplIsHighSurrogate(nChar))
+            if (rtl::isHighSurrogate(nChar))
             {
                 nHighSurrogate = static_cast<sal_Unicode>(nChar);
                 continue;
             }
-            else if (ImplIsLowSurrogate(nChar))
+            else if (rtl::isLowSurrogate(nChar))
             {
                 bUndefined = false;
                 goto bad_input;
             }
         }
-        else if (ImplIsLowSurrogate(nChar))
-            nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
+        else if (rtl::isLowSurrogate(nChar))
+            nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
         else
         {
             bUndefined = false;
diff --git a/sal/textenc/convertgb18030.cxx b/sal/textenc/convertgb18030.cxx
index 88f5a999d87d..d7963762fa2b 100644
--- a/sal/textenc/convertgb18030.cxx
+++ b/sal/textenc/convertgb18030.cxx
@@ -172,10 +172,7 @@ sal_Size ImplConvertGb18030ToUnicode(void const * pData,
                     if (pDestBufEnd - pDestBufPtr >= 2)
                     {
                         nCode -= 189000 - 0x10000;
-                        *pDestBufPtr++
-                            = 
static_cast<sal_Unicode>(ImplGetHighSurrogate(nCode));
-                        *pDestBufPtr++
-                            = 
static_cast<sal_Unicode>(ImplGetLowSurrogate(nCode));
+                        pDestBufPtr += rtl::splitSurrogates(nCode, 
pDestBufPtr);
                         startOfCurrentChar = nConverted + 1;
                     }
                     else
@@ -330,19 +327,19 @@ sal_Size ImplConvertUnicodeToGb18030(void const * pData,
         sal_uInt32 nChar = *pSrcBuf++;
         if (nHighSurrogate == 0)
         {
-            if (ImplIsHighSurrogate(nChar))
+            if (rtl::isHighSurrogate(nChar))
             {
                 nHighSurrogate = static_cast<sal_Unicode>(nChar);
                 continue;
             }
-            else if (ImplIsLowSurrogate(nChar))
+            else if (rtl::isLowSurrogate(nChar))
             {
                 bUndefined = false;
                 goto bad_input;
             }
         }
-        else if (ImplIsLowSurrogate(nChar))
-            nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
+        else if (rtl::isLowSurrogate(nChar))
+            nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
         else
         {
             bUndefined = false;
diff --git a/sal/textenc/convertisciidevangari.cxx 
b/sal/textenc/convertisciidevangari.cxx
index 759328565690..75fcadcf7fe4 100644
--- a/sal/textenc/convertisciidevangari.cxx
+++ b/sal/textenc/convertisciidevangari.cxx
@@ -266,20 +266,20 @@ sal_Size UnicodeToIsciiDevanagari::convert(sal_Unicode 
const* pSrcBuf, sal_Size
         char cSpecialChar = 0;
         if (cHighSurrogate == 0)
         {
-            if (ImplIsHighSurrogate(c))
+            if (rtl::isHighSurrogate(c))
             {
                 cHighSurrogate = static_cast< sal_Unicode >(c);
                 continue;
             }
-            else if (ImplIsLowSurrogate(c))
+            else if (rtl::isLowSurrogate(c))
             {
                 bUndefined = false;
                 goto bad_input;
             }
         }
-        else if (ImplIsLowSurrogate(c))
+        else if (rtl::isLowSurrogate(c))
         {
-            c = ImplCombineSurrogates(cHighSurrogate, c);
+            c = rtl::combineSurrogates(cHighSurrogate, c);
         }
         else
         {
diff --git a/sal/textenc/convertiso2022cn.cxx b/sal/textenc/convertiso2022cn.cxx
index 9e89c27486db..60c2b57436ff 100644
--- a/sal/textenc/convertiso2022cn.cxx
+++ b/sal/textenc/convertiso2022cn.cxx
@@ -329,7 +329,7 @@ sal_Size ImplConvertIso2022CnToUnicode(void const * pData,
                             = pCns116431992Data[nOffset + (nChar - nFirst)];
                         if (nUnicode == 0xFFFF)
                             goto bad_input;
-                        else if (ImplIsHighSurrogate(nUnicode))
+                        else if (rtl::isHighSurrogate(nUnicode))
                             if (pDestBufEnd - pDestBufPtr >= 2)
                             {
                                 nOffset += nLast - nFirst + 1;
@@ -556,19 +556,19 @@ sal_Size ImplConvertUnicodeToIso2022Cn(void const * pData,
         sal_uInt32 nChar = *pSrcBuf++;
         if (nHighSurrogate == 0)
         {
-            if (ImplIsHighSurrogate(nChar))
+            if (rtl::isHighSurrogate(nChar))
             {
                 nHighSurrogate = static_cast<sal_Unicode>(nChar);
                 continue;
             }
-            else if (ImplIsLowSurrogate(nChar))
+            else if (rtl::isLowSurrogate(nChar))
             {
                 bUndefined = false;
                 goto bad_input;
             }
         }
-        else if (ImplIsLowSurrogate(nChar))
-            nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
+        else if (rtl::isLowSurrogate(nChar))
+            nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
         else
         {
             bUndefined = false;
diff --git a/sal/textenc/convertiso2022jp.cxx b/sal/textenc/convertiso2022jp.cxx
index 4024653f04e5..935bc8515d77 100644
--- a/sal/textenc/convertiso2022jp.cxx
+++ b/sal/textenc/convertiso2022jp.cxx
@@ -375,19 +375,19 @@ sal_Size ImplConvertUnicodeToIso2022Jp(void const * pData,
         sal_uInt32 nChar = *pSrcBuf++;
         if (nHighSurrogate == 0)
         {
-            if (ImplIsHighSurrogate(nChar))
+            if (rtl::isHighSurrogate(nChar))
             {
                 nHighSurrogate = static_cast<sal_Unicode>(nChar);
                 continue;
             }
-            else if (ImplIsLowSurrogate(nChar))
+            else if (rtl::isLowSurrogate(nChar))
             {
                 bUndefined = false;
                 goto bad_input;
             }
         }
-        else if (ImplIsLowSurrogate(nChar))
-            nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
+        else if (rtl::isLowSurrogate(nChar))
+            nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
         else
         {
             bUndefined = false;
diff --git a/sal/textenc/convertiso2022kr.cxx b/sal/textenc/convertiso2022kr.cxx
index 5c7971ba5af6..5aea5c66c877 100644
--- a/sal/textenc/convertiso2022kr.cxx
+++ b/sal/textenc/convertiso2022kr.cxx
@@ -353,19 +353,19 @@ sal_Size ImplConvertUnicodeToIso2022Kr(void const * pData,
             sal_uInt32 nChar = *pSrcBuf++;
             if (nHighSurrogate == 0)
             {
-                if (ImplIsHighSurrogate(nChar))
+                if (rtl::isHighSurrogate(nChar))
                 {
                     nHighSurrogate = static_cast<sal_Unicode>(nChar);
                     continue;
                 }
-                else if (ImplIsLowSurrogate(nChar))
+                else if (rtl::isLowSurrogate(nChar))
                 {
                     bUndefined = false;
                     goto bad_input;
                 }
             }
-            else if (ImplIsLowSurrogate(nChar))
-                nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
+            else if (rtl::isLowSurrogate(nChar))
+                nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
             else
             {
                 bUndefined = false;
diff --git a/sal/textenc/convertsinglebytetobmpunicode.cxx 
b/sal/textenc/convertsinglebytetobmpunicode.cxx
index b948ba0fbc5c..43c2b9cf39ce 100644
--- a/sal/textenc/convertsinglebytetobmpunicode.cxx
+++ b/sal/textenc/convertsinglebytetobmpunicode.cxx
@@ -111,17 +111,17 @@ sal_Size rtl_textenc_convertBmpUnicodeToSingleByte(
         bool undefined = true;
         sal_uInt32 c = *srcBuf++;
         if (highSurrogate == 0) {
-            if (ImplIsHighSurrogate(c)) {
+            if (rtl::isHighSurrogate(c)) {
                 highSurrogate = static_cast< sal_Unicode >(c);
                 continue;
             }
-            else if (ImplIsLowSurrogate(c))
+            else if (rtl::isLowSurrogate(c))
             {
                 undefined = false;
                 goto bad_input;
             }
-        } else if (ImplIsLowSurrogate(c)) {
-            c = ImplCombineSurrogates(highSurrogate, c);
+        } else if (rtl::isLowSurrogate(c)) {
+            c = rtl::combineSurrogates(highSurrogate, c);
         } else {
             undefined = false;
             goto bad_input;
diff --git a/sal/textenc/handleundefinedunicodetotextchar.cxx 
b/sal/textenc/handleundefinedunicodetotextchar.cxx
index 320562495e97..76aed03e1881 100644
--- a/sal/textenc/handleundefinedunicodetotextchar.cxx
+++ b/sal/textenc/handleundefinedunicodetotextchar.cxx
@@ -106,7 +106,7 @@ bool sal::detail::textenc::handleUndefinedUnicodeToTextChar(
 
     /* Surrogates Characters should result in */
     /* one replacement character */
-    if (ImplIsHighSurrogate(c))
+    if (rtl::isHighSurrogate(c))
     {
         if ( ((*ppSrcBuf) + 1) == pEndSrcBuf )
         {
@@ -115,7 +115,7 @@ bool sal::detail::textenc::handleUndefinedUnicodeToTextChar(
         }
 
         c = *((*ppSrcBuf)+1);
-        if (ImplIsLowSurrogate(c))
+        if (rtl::isLowSurrogate(c))
             (*ppSrcBuf)++;
         else
         {
diff --git a/sal/textenc/tcvtutf8.cxx b/sal/textenc/tcvtutf8.cxx
index ca29156c418f..f210b654d57f 100644
--- a/sal/textenc/tcvtutf8.cxx
+++ b/sal/textenc/tcvtutf8.cxx
@@ -199,10 +199,7 @@ sal_Size ImplConvertUtf8ToUnicode(
                 else
                     goto no_output;
             else if (pDestBufEnd - pDestBufPtr >= 2)
-            {
-                *pDestBufPtr++ = 
static_cast<sal_Unicode>(ImplGetHighSurrogate(nUtf32));
-                *pDestBufPtr++ = 
static_cast<sal_Unicode>(ImplGetLowSurrogate(nUtf32));
-            }
+                pDestBufPtr += rtl::splitSurrogates(nUtf32, pDestBufPtr);
             else
                 goto no_output;
         }
@@ -349,18 +346,18 @@ sal_Size ImplConvertUnicodeToUtf8(
         sal_uInt32 nChar = *pSrcBufPtr++;
         if (nHighSurrogate == 0)
         {
-            if (ImplIsHighSurrogate(nChar) && !bJavaUtf8)
+            if (rtl::isHighSurrogate(nChar) && !bJavaUtf8)
             {
                 nHighSurrogate = static_cast<sal_Unicode>(nChar);
                 continue;
             }
-            else if (ImplIsLowSurrogate(nChar) && !bJavaUtf8)
+            else if (rtl::isLowSurrogate(nChar) && !bJavaUtf8)
             {
                 goto bad_input;
             }
         }
-        else if (ImplIsLowSurrogate(nChar) && !bJavaUtf8)
-            nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
+        else if (rtl::isLowSurrogate(nChar) && !bJavaUtf8)
+            nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
         else
             goto bad_input;
 
diff --git a/sal/textenc/unichars.hxx b/sal/textenc/unichars.hxx
index 8ca1021da005..e627d3c95c3e 100644
--- a/sal/textenc/unichars.hxx
+++ b/sal/textenc/unichars.hxx
@@ -31,34 +31,10 @@
 
 bool ImplIsControlOrFormat(sal_uInt32 nUtf32);
 
-// All code points that are high-surrogates, as of Unicode 3.1.1.
-inline bool ImplIsHighSurrogate(sal_uInt32 nUtf32) { return nUtf32 >= 0xD800 
&& nUtf32 <= 0xDBFF; }
-
-// All code points that are low-surrogates, as of Unicode 3.1.1.
-inline bool ImplIsLowSurrogate(sal_uInt32 nUtf32) { return nUtf32 >= 0xDC00 && 
nUtf32 <= 0xDFFF; }
-
 bool ImplIsPrivateUse(sal_uInt32 nUtf32);
 
 bool ImplIsZeroWidth(sal_uInt32 nUtf32);
 
-inline sal_uInt32 ImplGetHighSurrogate(sal_uInt32 nUtf32)
-{
-    assert(nUtf32 >= 0x10000);
-    return ((nUtf32 - 0x10000) >> 10) | 0xD800;
-}
-
-inline sal_uInt32 ImplGetLowSurrogate(sal_uInt32 nUtf32)
-{
-    assert(nUtf32 >= 0x10000);
-    return ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00;
-}
-
-inline sal_uInt32 ImplCombineSurrogates(sal_uInt32 nHigh, sal_uInt32 nLow)
-{
-    assert(ImplIsHighSurrogate(nHigh) && ImplIsLowSurrogate(nLow));
-    return (((nHigh & 0x3FF) << 10) | (nLow & 0x3FF)) + 0x10000;
-}
-
 #endif
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/expatwrap/saxwriter.cxx 
b/sax/source/expatwrap/saxwriter.cxx
index 37eb58f099db..e19a31211d98 100644
--- a/sax/source/expatwrap/saxwriter.cxx
+++ b/sax/source/expatwrap/saxwriter.cxx
@@ -447,20 +447,22 @@ bool SaxWriterHelper::convertToXML(const sal_Unicode* 
pStr, sal_Int32 nStrLen,
             }
 
             // Deal with other unicode cases
-            if (c >= 0xd800 && c < 0xdc00)
+            if (rtl::isHighSurrogate(c))
             {
                 // 1. surrogate: save (until 2. surrogate)
-                OSL_ENSURE(nSurrogate == 0, "left-over Unicode surrogate");
-                nSurrogate = ((c & 0x03ff) + 0x0040);
+                if (nSurrogate != 0) // left-over lone 1st Unicode surrogate
+                {
+                    OSL_FAIL("left-over Unicode surrogate");
+                    bRet = false;
+                }
+                nSurrogate = c;
             }
-            else if (c >= 0xdc00 && c < 0xe000)
+            else if (rtl::isLowSurrogate(c))
             {
                 // 2. surrogate: write as UTF-8
-                OSL_ENSURE(nSurrogate != 0, "lone 2nd Unicode surrogate");
-
-                nSurrogate = (nSurrogate << 10) | (c & 0x03ff);
-                if (rtl::isUnicodeScalarValue(nSurrogate) && nSurrogate >= 
0x00010000)
+                if (nSurrogate) // can only be 1st surrogate
                 {
+                    nSurrogate = rtl::combineSurrogates(nSurrogate, c);
                     sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) 
& 0x0F)),
                                           sal_Int8(0x80 | ((nSurrogate >> 12) 
& 0x3F)),
                                           sal_Int8(0x80 | ((nSurrogate >> 6) & 
0x3F)),
@@ -479,7 +481,7 @@ bool SaxWriterHelper::convertToXML(const sal_Unicode* pStr, 
sal_Int32 nStrLen,
                         rPos++;
                     }
                 }
-                else
+                else // lone 2nd surrogate
                 {
                     OSL_FAIL("illegal Unicode character");
                     bRet = false;
@@ -526,13 +528,18 @@ bool SaxWriterHelper::convertToXML(const sal_Unicode* 
pStr, sal_Int32 nStrLen,
             rPos = writeSequence();
 
         // reset left-over surrogate
-        if ((nSurrogate != 0) && (c < 0xd800 || c >= 0xdc00))
+        if ((nSurrogate != 0) && !rtl::isHighSurrogate(c))
         {
-            OSL_ENSURE(nSurrogate != 0, "left-over Unicode surrogate");
+            OSL_FAIL("left-over Unicode surrogate");
             nSurrogate = 0;
             bRet = false;
         }
     }
+    if (nSurrogate != 0) // trailing lone 1st surrogate
+    {
+        OSL_FAIL("left-over Unicode surrogate");
+        bRet = false;
+    }
     return bRet;
 }
 
@@ -951,16 +958,15 @@ sal_Int32 SaxWriterHelper::calcXMLByteLength(const 
OUString& rStr, bool bDoNorma
             }
 
             // Deal with other unicode cases
-            if (c >= 0xd800 && c < 0xdc00)
+            if (rtl::isHighSurrogate(c))
             {
                 // save surrogate
-                nSurrogate = ((c & 0x03ff) + 0x0040);
+                nSurrogate = c;
             }
-            else if (c >= 0xdc00 && c < 0xe000)
+            else if (rtl::isLowSurrogate(c))
             {
                 // 2. surrogate: write as UTF-8 (if range is OK
-                nSurrogate = (nSurrogate << 10) | (c & 0x03ff);
-                if (rtl::isUnicodeScalarValue(nSurrogate) && nSurrogate >= 
0x00010000)
+                if (nSurrogate)
                     nOutputLength += 4;
                 nSurrogate = 0;
             }
@@ -975,7 +981,7 @@ sal_Int32 SaxWriterHelper::calcXMLByteLength(const 
OUString& rStr, bool bDoNorma
         }
 
         // surrogate processing
-        if ((nSurrogate != 0) && (c < 0xd800 || c >= 0xdc00))
+        if ((nSurrogate != 0) && !rtl::isHighSurrogate(c))
             nSurrogate = 0;
     }
 
diff --git a/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx 
b/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx
index 1614f44b2f89..054c6ca3d76c 100644
--- a/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx
+++ b/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx
@@ -115,14 +115,7 @@ OUString parsePart(
                 {
                     break;
                 }
-                if (encoded <= 0xFFFF) {
-                    buf.append(static_cast< sal_Unicode >(encoded));
-                } else {
-                    buf.append(static_cast< sal_Unicode >(
-                        (encoded >> 10) | 0xD800));
-                    buf.append(static_cast< sal_Unicode >(
-                        (encoded & 0x3FF) | 0xDC00));
-                }
+                buf.appendUtf32(encoded);
             } else {
                 break;
             }
diff --git a/tools/source/inet/inetmime.cxx b/tools/source/inet/inetmime.cxx
index f7265523b6e2..85f03cfce3e2 100644
--- a/tools/source/inet/inetmime.cxx
+++ b/tools/source/inet/inetmime.cxx
@@ -131,30 +131,6 @@ std::unique_ptr<sal_Unicode[]> convertToUnicode(const char 
* pBegin,
     return pBuffer;
 }
 
-/** Put the UTF-16 encoding of a UTF-32 character into a buffer.
-
-    @param pBuffer  Points to a buffer, must not be null.
-
-    @param nUTF32  A UTF-32 character, must be in the range 0..0x10FFFF.
-
-    @return  A pointer past the UTF-16 characters put into the buffer
-    (i.e., pBuffer + 1 or pBuffer + 2).
- */
-sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
-                                                 sal_uInt32 nUTF32)
-{
-    DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32), "putUTF32Character(): Bad 
char");
-    if (nUTF32 < 0x10000)
-        *pBuffer++ = sal_Unicode(nUTF32);
-    else
-    {
-        nUTF32 -= 0x10000;
-        *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
-        *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
-    }
-    return pBuffer;
-}
-
 void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
 {
     // See RFC 2279 for a discussion of UTF-8.
@@ -1386,9 +1362,7 @@ OUString INetMIME::decodeHeaderFieldBody(const OString& 
rBody)
                 if (translateUTF8Char(pUTF8End, pEnd, nCharacter))
                 {
                     appendISO88591(sDecoded, pCopyBegin, p - 1);
-                    sal_Unicode aUTF16Buf[2];
-                    sal_Int32 nUTF16Len = putUTF32Character(aUTF16Buf, 
nCharacter) - aUTF16Buf;
-                    sDecoded.append(aUTF16Buf, nUTF16Len);
+                    sDecoded.appendUtf32(nCharacter);
                     p = pUTF8End;
                     pCopyBegin = p;
                 }
diff --git a/vcl/source/gdi/scrptrun.cxx b/vcl/source/gdi/scrptrun.cxx
index c0da5c5322ed..fbc68ca41bf1 100644
--- a/vcl/source/gdi/scrptrun.cxx
+++ b/vcl/source/gdi/scrptrun.cxx
@@ -37,6 +37,10 @@
   * This file is largely copied from the ICU project,
   * under folder source/extra/scrptrun/scrptrun.cpp
   */
+
+#include <sal/config.h>
+
+#include <rtl/character.hxx>
 #include <unicode/utypes.h>
 #include <unicode/uscript.h>
 
@@ -160,14 +164,14 @@ UBool ScriptRun::next()
 
         // if the character is a high surrogate and it's not the last one
         // in the text, see if it's followed by a low surrogate
-        if (high >= 0xD800 && high <= 0xDBFF && scriptEnd < charLimit - 1)
+        if (rtl::isHighSurrogate(high) && scriptEnd < charLimit - 1)
         {
             UChar low = charArray[scriptEnd + 1];
 
             // if it is followed by a low surrogate,
             // consume it and form the full character
-            if (low >= 0xDC00 && low <= 0xDFFF) {
-                ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000;
+            if (rtl::isLowSurrogate(low)) {
+                ch = rtl::combineSurrogates(high, low);
                 scriptEnd += 1;
             }
         }
diff --git a/vcl/win/window/salframe.cxx b/vcl/win/window/salframe.cxx
index 6593b2fbebb6..95b9a36693ad 100644
--- a/vcl/win/window/salframe.cxx
+++ b/vcl/win/window/salframe.cxx
@@ -38,6 +38,7 @@
 #include <osl/file.hxx>
 #include <osl/process.h>
 
+#include <rtl/character.hxx>
 #include <rtl/string.h>
 #include <rtl/ustring.h>
 #include <sal/log.hxx>
@@ -122,8 +123,6 @@ const unsigned int WM_USER_SYSTEM_WINDOW_ACTIVATED = 
RegisterWindowMessageW(L"SY
 bool WinSalFrame::mbInReparent = false;
 
 // Macros for support of WM_UNICHAR & Keyman 6.0
-//#define Uni_UTF32ToSurrogate1(ch)   (((unsigned long) (ch) - 0x10000) / 
0x400 + 0xD800)
-#define Uni_UTF32ToSurrogate2(ch)   ((static_cast<tools::ULong>(ch) - 0x10000) 
% 0x400 + 0xDC00)
 #define Uni_SupplementaryPlanesStart    0x10000
 
 static void UpdateFrameGeometry( HWND hWnd, WinSalFrame* pFrame );
@@ -3472,7 +3471,7 @@ static bool ImplHandleKeyMsg( HWND hWnd, UINT nMsg,
              nLastVKChar = 0;
              pFrame->CallCallback( SalEvent::KeyInput, &aKeyEvt );
              pFrame->CallCallback( SalEvent::KeyUp, &aKeyEvt );
-             wParam = static_cast<sal_Unicode>(Uni_UTF32ToSurrogate2( wParam 
));
+             wParam = rtl::getLowSurrogate( wParam );
         }
 
         aKeyEvt.mnCharCode = static_cast<sal_Unicode>(wParam);

Reply via email to