comphelper/source/misc/string.cxx | 2 vcl/source/gdi/sallayout.cxx | 84 ++++++++++++++++++-------------------- 2 files changed, 41 insertions(+), 45 deletions(-)
New commits: commit d4c463c04bcd3716bf4c61d387e853cdd9859a81 Author: Neil Roberts <[email protected]> AuthorDate: Wed Nov 5 12:41:34 2025 +0100 Commit: Noel Grandin <[email protected]> CommitDate: Thu Nov 6 08:19:10 2025 +0100 Shortcut LocalizeDigitsInString if locale already uses arabic digits Instead of calling GetLocalizedChar for every character in the string to be localized, we now just call a replacement function that returns the offset to apply for digits. That means that in the likely case that the offset is zero we can skip scanning the string altogether and just return the original one. Change-Id: I8b1ffa3ba38d5a8ed491668c7c1a242327bce626 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/193462 Reviewed-by: Noel Grandin <[email protected]> Tested-by: Jenkins diff --git a/comphelper/source/misc/string.cxx b/comphelper/source/misc/string.cxx index 8b72ad5f34a5..a472cf57d1e8 100644 --- a/comphelper/source/misc/string.cxx +++ b/comphelper/source/misc/string.cxx @@ -474,7 +474,7 @@ sal_Int32 compareNatural( const OUString & rLHS, const OUString & rRHS, nRHSChunkLen = nRHSLastNonDigitPos - nRHSFirstDigitPos; //To-Do: Possibly scale down those unicode codepoints that relate to - //numbers outside of the normal 0-9 range, e.g. see GetLocalizedChar in + //numbers outside of the normal 0-9 range, e.g. see LocalizeDigitsInString in //vcl sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS.subView(nLHSFirstDigitPos, nLHSChunkLen)); diff --git a/vcl/source/gdi/sallayout.cxx b/vcl/source/gdi/sallayout.cxx index 67b52fba8d39..5f6b0330dd7a 100644 --- a/vcl/source/gdi/sallayout.cxx +++ b/vcl/source/gdi/sallayout.cxx @@ -50,77 +50,67 @@ namespace { -sal_UCS4 GetLocalizedChar( sal_UCS4 nChar, LanguageType eLang ) +int GetLocalizedDigitOffset( LanguageType eLang ) { - // currently only conversion from ASCII digits is interesting - if( (nChar < '0') || ('9' < nChar) ) - return nChar; - - int nOffset; // eLang & LANGUAGE_MASK_PRIMARY catches language independent of region. // CAVEAT! To some like Mongolian MS assigned the same primary language // although the script type is different! LanguageType pri = primary(eLang); if( pri == primary(LANGUAGE_ARABIC_SAUDI_ARABIA) ) - nOffset = 0x0660 - '0'; // arabic-indic digits + return 0x0660 - '0'; // arabic-indic digits else if ( pri.anyOf( primary(LANGUAGE_FARSI), primary(LANGUAGE_URDU_PAKISTAN), primary(LANGUAGE_PUNJABI), //??? primary(LANGUAGE_SINDHI))) - nOffset = 0x06F0 - '0'; // eastern arabic-indic digits + return 0x06F0 - '0'; // eastern arabic-indic digits else if ( pri == primary(LANGUAGE_BENGALI) ) - nOffset = 0x09E6 - '0'; // bengali + return 0x09E6 - '0'; // bengali else if ( pri == primary(LANGUAGE_HINDI) ) - nOffset = 0x0966 - '0'; // devanagari + return 0x0966 - '0'; // devanagari else if ( pri.anyOf( primary(LANGUAGE_AMHARIC_ETHIOPIA), primary(LANGUAGE_TIGRIGNA_ETHIOPIA))) // TODO case: - nOffset = 0x1369 - '0'; // ethiopic + return 0x1369 - '0'; // ethiopic else if ( pri == primary(LANGUAGE_GUJARATI) ) - nOffset = 0x0AE6 - '0'; // gujarati + return 0x0AE6 - '0'; // gujarati #ifdef LANGUAGE_GURMUKHI // TODO case: else if ( pri == primary(LANGUAGE_GURMUKHI) ) - nOffset = 0x0A66 - '0'; // gurmukhi + return 0x0A66 - '0'; // gurmukhi #endif else if ( pri == primary(LANGUAGE_KANNADA) ) - nOffset = 0x0CE6 - '0'; // kannada + return 0x0CE6 - '0'; // kannada else if ( pri == primary(LANGUAGE_KHMER)) - nOffset = 0x17E0 - '0'; // khmer + return 0x17E0 - '0'; // khmer else if ( pri == primary(LANGUAGE_LAO) ) - nOffset = 0x0ED0 - '0'; // lao + return 0x0ED0 - '0'; // lao else if ( pri == primary(LANGUAGE_MALAYALAM) ) - nOffset = 0x0D66 - '0'; // malayalam + return 0x0D66 - '0'; // malayalam else if ( pri == primary(LANGUAGE_MONGOLIAN_MONGOLIAN_LSO)) { if (eLang.anyOf( LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA, LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA, LANGUAGE_MONGOLIAN_MONGOLIAN_LSO)) - nOffset = 0x1810 - '0'; // mongolian + return 0x1810 - '0'; // mongolian else - nOffset = 0; // mongolian cyrillic + return 0; // mongolian cyrillic } else if ( pri == primary(LANGUAGE_BURMESE) ) - nOffset = 0x1040 - '0'; // myanmar + return 0x1040 - '0'; // myanmar else if ( pri == primary(LANGUAGE_ODIA) ) - nOffset = 0x0B66 - '0'; // odia + return 0x0B66 - '0'; // odia else if ( pri == primary(LANGUAGE_TAMIL) ) - nOffset = 0x0BE7 - '0'; // tamil + return 0x0BE7 - '0'; // tamil else if ( pri == primary(LANGUAGE_TELUGU) ) - nOffset = 0x0C66 - '0'; // telugu + return 0x0C66 - '0'; // telugu else if ( pri == primary(LANGUAGE_THAI) ) - nOffset = 0x0E50 - '0'; // thai + return 0x0E50 - '0'; // thai else if ( pri == primary(LANGUAGE_TIBETAN) ) - nOffset = 0x0F20 - '0'; // tibetan + return 0x0F20 - '0'; // tibetan else - { - nOffset = 0; - } - - nChar += nOffset; - return nChar; + return 0; } } @@ -128,36 +118,42 @@ sal_UCS4 GetLocalizedChar( sal_UCS4 nChar, LanguageType eLang ) OUString LocalizeDigitsInString( const OUString& sStr, LanguageType eTextLanguage, sal_Int32 nStart, sal_Int32& nLen ) { - sal_Int32 nextPos, nEnd = nStart + nLen; + int digitOffset = GetLocalizedDigitOffset(eTextLanguage); - for (sal_Int32 i = nStart; i < nEnd; i = nextPos) - { - nextPos = i; - sal_uInt32 nChar = sStr.iterateCodePoints(&nextPos); + // If we’re already using arabic digits then we can shortcut the function just return the + // original string + if (digitOffset == 0) + return sStr; + + sal_Int32 nEnd = nStart + nLen; - sal_UCS4 nReplacementChar = GetLocalizedChar(nChar, eTextLanguage); + for (sal_Int32 i = nStart; i < nEnd; ++i) + { + sal_Unicode nChar = sStr[i]; // The first time we encounter a character that needs to change we’ll make a copy of the // string so we can return a new modified one - if (nReplacementChar != nChar) + if (nChar >= '0' && nChar <= '9') { // The new string is very likely to have the same length as the old one OUStringBuffer xTmpStr(sStr.getLength()); xTmpStr.append(sStr.subView(0, i)); - xTmpStr.appendUtf32(nReplacementChar); // Convert the remainder of the range - for (i = nextPos; i < nEnd;) + for (; i < nEnd; ++i) { - nReplacementChar = GetLocalizedChar(sStr.iterateCodePoints(&i), eTextLanguage); - xTmpStr.appendUtf32(nReplacementChar); + nChar = sStr[i]; + if (nChar >= '0' && nChar <= '9') + xTmpStr.appendUtf32(nChar + digitOffset); + else + xTmpStr.append(nChar); } // Add the rest of the string outside of the range xTmpStr.append(sStr.subView(nEnd)); - // The length of the string might have changed if GetLocalizedChar converts between BMP - // and surrogate pairs + // The length of the string might have changed if the offset makes the character need + // surrogate pairs nLen += xTmpStr.getLength() - sStr.getLength(); return xTmpStr.makeStringAndClear();
