comphelper/source/misc/string.cxx |    2 
 vcl/source/gdi/sallayout.cxx      |   84 ++++++++++++++++++--------------------
 2 files changed, 41 insertions(+), 45 deletions(-)

New commits:
commit d4c463c04bcd3716bf4c61d387e853cdd9859a81
Author:     Neil Roberts <[email protected]>
AuthorDate: Wed Nov 5 12:41:34 2025 +0100
Commit:     Noel Grandin <[email protected]>
CommitDate: Thu Nov 6 08:19:10 2025 +0100

    Shortcut LocalizeDigitsInString if locale already uses arabic digits
    
    Instead of calling GetLocalizedChar for every character in the string to
    be localized, we now just call a replacement function that returns the
    offset to apply for digits. That means that in the likely case that the
    offset is zero we can skip scanning the string altogether and just
    return the original one.
    
    Change-Id: I8b1ffa3ba38d5a8ed491668c7c1a242327bce626
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/193462
    Reviewed-by: Noel Grandin <[email protected]>
    Tested-by: Jenkins

diff --git a/comphelper/source/misc/string.cxx 
b/comphelper/source/misc/string.cxx
index 8b72ad5f34a5..a472cf57d1e8 100644
--- a/comphelper/source/misc/string.cxx
+++ b/comphelper/source/misc/string.cxx
@@ -474,7 +474,7 @@ sal_Int32 compareNatural( const OUString & rLHS, const 
OUString & rRHS,
         nRHSChunkLen = nRHSLastNonDigitPos - nRHSFirstDigitPos;
 
         //To-Do: Possibly scale down those unicode codepoints that relate to
-        //numbers outside of the normal 0-9 range, e.g. see GetLocalizedChar in
+        //numbers outside of the normal 0-9 range, e.g. see 
LocalizeDigitsInString in
         //vcl
 
         sal_uInt32 nLHS = 
comphelper::string::decimalStringToNumber(rLHS.subView(nLHSFirstDigitPos, 
nLHSChunkLen));
diff --git a/vcl/source/gdi/sallayout.cxx b/vcl/source/gdi/sallayout.cxx
index 67b52fba8d39..5f6b0330dd7a 100644
--- a/vcl/source/gdi/sallayout.cxx
+++ b/vcl/source/gdi/sallayout.cxx
@@ -50,77 +50,67 @@
 namespace
 {
 
-sal_UCS4 GetLocalizedChar( sal_UCS4 nChar, LanguageType eLang )
+int GetLocalizedDigitOffset( LanguageType eLang )
 {
-    // currently only conversion from ASCII digits is interesting
-    if( (nChar < '0') || ('9' < nChar) )
-        return nChar;
-
-    int nOffset;
     // eLang & LANGUAGE_MASK_PRIMARY catches language independent of region.
     // CAVEAT! To some like Mongolian MS assigned the same primary language
     // although the script type is different!
     LanguageType pri = primary(eLang);
     if( pri == primary(LANGUAGE_ARABIC_SAUDI_ARABIA) )
-        nOffset = 0x0660 - '0';  // arabic-indic digits
+        return 0x0660 - '0';  // arabic-indic digits
     else if ( pri.anyOf(
         primary(LANGUAGE_FARSI),
         primary(LANGUAGE_URDU_PAKISTAN),
         primary(LANGUAGE_PUNJABI), //???
         primary(LANGUAGE_SINDHI)))
-        nOffset = 0x06F0 - '0';  // eastern arabic-indic digits
+        return 0x06F0 - '0';  // eastern arabic-indic digits
     else if ( pri == primary(LANGUAGE_BENGALI) )
-        nOffset = 0x09E6 - '0';  // bengali
+        return 0x09E6 - '0';  // bengali
     else if ( pri == primary(LANGUAGE_HINDI) )
-        nOffset = 0x0966 - '0';  // devanagari
+        return 0x0966 - '0';  // devanagari
     else if ( pri.anyOf(
         primary(LANGUAGE_AMHARIC_ETHIOPIA),
         primary(LANGUAGE_TIGRIGNA_ETHIOPIA)))
         // TODO case:
-        nOffset = 0x1369 - '0';  // ethiopic
+        return 0x1369 - '0';  // ethiopic
     else if ( pri == primary(LANGUAGE_GUJARATI) )
-        nOffset = 0x0AE6 - '0';  // gujarati
+        return 0x0AE6 - '0';  // gujarati
 #ifdef LANGUAGE_GURMUKHI // TODO case:
     else if ( pri == primary(LANGUAGE_GURMUKHI) )
-        nOffset = 0x0A66 - '0';  // gurmukhi
+        return 0x0A66 - '0';  // gurmukhi
 #endif
     else if ( pri == primary(LANGUAGE_KANNADA) )
-        nOffset = 0x0CE6 - '0';  // kannada
+        return 0x0CE6 - '0';  // kannada
     else if ( pri == primary(LANGUAGE_KHMER))
-        nOffset = 0x17E0 - '0';  // khmer
+        return 0x17E0 - '0';  // khmer
     else if ( pri == primary(LANGUAGE_LAO) )
-        nOffset = 0x0ED0 - '0';  // lao
+        return 0x0ED0 - '0';  // lao
     else if ( pri == primary(LANGUAGE_MALAYALAM) )
-        nOffset = 0x0D66 - '0';  // malayalam
+        return 0x0D66 - '0';  // malayalam
     else if ( pri == primary(LANGUAGE_MONGOLIAN_MONGOLIAN_LSO))
     {
         if (eLang.anyOf(
              LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA,
              LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA,
              LANGUAGE_MONGOLIAN_MONGOLIAN_LSO))
-                nOffset = 0x1810 - '0';   // mongolian
+                return 0x1810 - '0';   // mongolian
         else
-                nOffset = 0;              // mongolian cyrillic
+                return 0;              // mongolian cyrillic
     }
     else if ( pri == primary(LANGUAGE_BURMESE) )
-        nOffset = 0x1040 - '0';  // myanmar
+        return 0x1040 - '0';  // myanmar
     else if ( pri == primary(LANGUAGE_ODIA) )
-        nOffset = 0x0B66 - '0';  // odia
+        return 0x0B66 - '0';  // odia
     else if ( pri == primary(LANGUAGE_TAMIL) )
-        nOffset = 0x0BE7 - '0';  // tamil
+        return 0x0BE7 - '0';  // tamil
     else if ( pri == primary(LANGUAGE_TELUGU) )
-        nOffset = 0x0C66 - '0';  // telugu
+        return 0x0C66 - '0';  // telugu
     else if ( pri == primary(LANGUAGE_THAI) )
-        nOffset = 0x0E50 - '0';  // thai
+        return 0x0E50 - '0';  // thai
     else if ( pri == primary(LANGUAGE_TIBETAN) )
-        nOffset = 0x0F20 - '0';  // tibetan
+        return 0x0F20 - '0';  // tibetan
     else
-    {
-        nOffset = 0;
-    }
-
-    nChar += nOffset;
-    return nChar;
+        return 0;
 }
 
 }
@@ -128,36 +118,42 @@ sal_UCS4 GetLocalizedChar( sal_UCS4 nChar, LanguageType 
eLang )
 OUString LocalizeDigitsInString( const OUString& sStr, LanguageType 
eTextLanguage,
                                  sal_Int32 nStart, sal_Int32& nLen )
 {
-    sal_Int32 nextPos, nEnd = nStart + nLen;
+    int digitOffset = GetLocalizedDigitOffset(eTextLanguage);
 
-    for (sal_Int32 i = nStart; i < nEnd; i = nextPos)
-    {
-        nextPos = i;
-        sal_uInt32 nChar = sStr.iterateCodePoints(&nextPos);
+    // If we’re already using arabic digits then we can shortcut the function 
just return the
+    // original string
+    if (digitOffset == 0)
+        return sStr;
+
+    sal_Int32 nEnd = nStart + nLen;
 
-        sal_UCS4 nReplacementChar = GetLocalizedChar(nChar, eTextLanguage);
+    for (sal_Int32 i = nStart; i < nEnd; ++i)
+    {
+        sal_Unicode nChar = sStr[i];
 
         // The first time we encounter a character that needs to change we’ll 
make a copy of the
         // string so we can return a new modified one
-        if (nReplacementChar != nChar)
+        if (nChar >= '0' && nChar <= '9')
         {
             // The new string is very likely to have the same length as the 
old one
             OUStringBuffer xTmpStr(sStr.getLength());
             xTmpStr.append(sStr.subView(0, i));
-            xTmpStr.appendUtf32(nReplacementChar);
 
             // Convert the remainder of the range
-            for (i = nextPos; i < nEnd;)
+            for (; i < nEnd; ++i)
             {
-                nReplacementChar = 
GetLocalizedChar(sStr.iterateCodePoints(&i), eTextLanguage);
-                xTmpStr.appendUtf32(nReplacementChar);
+                nChar = sStr[i];
+                if (nChar >= '0' && nChar <= '9')
+                    xTmpStr.appendUtf32(nChar + digitOffset);
+                else
+                    xTmpStr.append(nChar);
             }
 
             // Add the rest of the string outside of the range
             xTmpStr.append(sStr.subView(nEnd));
 
-            // The length of the string might have changed if GetLocalizedChar 
converts between BMP
-            // and surrogate pairs
+            // The length of the string might have changed if the offset makes 
the character need
+            // surrogate pairs
             nLen += xTmpStr.getLength() - sStr.getLength();
 
             return xTmpStr.makeStringAndClear();

Reply via email to