i18nutil/source/utility/unicode.cxx |   32 ++++++++++++++++++++++++++++++++
 include/i18nutil/unicode.hxx        |    9 +++++++++
 2 files changed, 41 insertions(+)

New commits:
commit ecc30ac20f559e47fc4a183942d73913d615ff3f
Author:     Eike Rathke <er...@redhat.com>
AuthorDate: Wed Oct 5 01:29:02 2022 +0200
Commit:     Eike Rathke <er...@redhat.com>
CommitDate: Wed Oct 5 12:05:01 2022 +0200

    Introduce unicode::getScriptClassFromLanguageTag()
    
    Change-Id: Ifb932ff2aabc5767571433627314f0b29c35c471
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/140953
    Reviewed-by: Eike Rathke <er...@redhat.com>
    Tested-by: Jenkins

diff --git a/i18nutil/source/utility/unicode.cxx 
b/i18nutil/source/utility/unicode.cxx
index 5e479989eae9..33f1ca2f190e 100644
--- a/i18nutil/source/utility/unicode.cxx
+++ b/i18nutil/source/utility/unicode.cxx
@@ -24,6 +24,7 @@
 #include <i18nutil/unicode.hxx>
 #include <sal/log.hxx>
 #include <unicode/numfmt.h>
+#include <unicode/uchar.h>
 #include "unicode_data.h"
 #include <rtl/character.hxx>
 #include <o3tl/string_view.hxx>
@@ -190,6 +191,37 @@ sal_Int16 
unicode::getScriptClassFromUScriptCode(UScriptCode eScript)
     return nRet;
 }
 
+sal_Int16 unicode::getScriptClassFromLanguageTag( const LanguageTag& 
rLanguageTag )
+{
+    static UScriptCode nMaxScript = 
static_cast<UScriptCode>(u_getIntPropertyMaxValue(UCHAR_SCRIPT));
+    constexpr int32_t nBuf = 42;
+    UScriptCode aBuf[nBuf];
+    if (rLanguageTag.hasScript())
+    {
+        aBuf[0] = static_cast<UScriptCode>(u_getPropertyValueEnum( 
UCHAR_SCRIPT,
+                OUStringToOString( rLanguageTag.getScript(), 
RTL_TEXTENCODING_ASCII_US).getStr()));
+    }
+    else
+    {
+        OUString aName;
+        if (rLanguageTag.getCountry().isEmpty())
+            aName = rLanguageTag.getLanguage();
+        else
+            aName = rLanguageTag.getLanguage() + "-" + 
rLanguageTag.getCountry();
+        UErrorCode status = U_ZERO_ERROR;
+        const int32_t nScripts = uscript_getCode(
+                OUStringToOString( aName, RTL_TEXTENCODING_ASCII_US).getStr(),
+                aBuf, nBuf, &status);
+        // U_BUFFER_OVERFLOW_ERROR would be set with too many scripts for 
buffer
+        // and required capacity returned, but really..
+        if (nScripts == 0 || !U_SUCCESS(status))
+            return css::i18n::ScriptType::LATIN;
+    }
+    if (aBuf[0] > nMaxScript)
+        return css::i18n::ScriptType::COMPLEX;
+    return getScriptClassFromUScriptCode( aBuf[0]);
+}
+
 OString unicode::getExemplarLanguageForUScriptCode(UScriptCode eScript)
 {
     OString sRet;
diff --git a/include/i18nutil/unicode.hxx b/include/i18nutil/unicode.hxx
index 0ca14290981e..ebe50ce90384 100644
--- a/include/i18nutil/unicode.hxx
+++ b/include/i18nutil/unicode.hxx
@@ -82,6 +82,15 @@ public:
     //Format a number as a percentage according to the rules of the given
     //language, e.g. 100 -> "100%" for en-US vs "100 %" for de-DE
     static OUString formatPercent(double dNumber, const LanguageTag& rLangTag);
+
+    /** Map a LanguageTag's language ISO 639 code or script ISO 15924 code or
+        language-script or locale to Latin/Asian/Complex/Weak. If more than one
+        script is used with a language(-country) tag then the first (default)
+        script is mapped for that language.
+
+        @return a css::i18n::ScriptType value.
+     */
+    static sal_Int16 getScriptClassFromLanguageTag(const LanguageTag& 
rLanguageTag);
 };
 
 /*

Reply via email to