formula/source/core/api/FormulaCompiler.cxx | 19 +++++- sc/inc/compiler.hxx | 10 ++- sc/source/core/tool/compiler.cxx | 82 ++++++++++++++++++++-------- 3 files changed, 83 insertions(+), 28 deletions(-)
New commits: commit af75098d524311416a5f7caf6ae76055cc689ad1 Author: Eike Rathke <er...@redhat.com> AuthorDate: Mon Sep 28 21:02:23 2020 +0200 Commit: Muhammet Kara <muhammet.k...@collabora.com> CommitDate: Sun Oct 18 22:36:40 2020 +0200 Resolves: tdf#137091 Use CharClass matching the formula language This is a combination of 3 commits. Resolves: tdf#137091 Use CharClass matching the formula language ... not the current locale. Specifically important for uppercase/lowercase conversions that may yield different results for example in Turkish i with/without dot. I2aa57cdcf530d7a0697c4ffbd5dccb86bb526d8e Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103588 Tested-by: Jenkins Reviewed-by: Eike Rathke <er...@redhat.com> (cherry picked from commit 3c6177be2705303044e3de262689d593f3d0f282) Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org> Current sytem locale's CharClass for user defined names, tdf#137091 follow-up I5f025a12ca183acb3f80d2a7527677aceb9ffbd5 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103593 Reviewed-by: Eike Rathke <er...@redhat.com> Tested-by: Jenkins (cherry picked from commit d41c45a522c5e973d7043d36bc6c82e77735ab9b) Determine CharClass difference once, tdf#137091 follow-up As a side note: Clang plugin simplifybool for !(rLT1.getLanguage() == "en" && rLT2.getLanguage() == "en") told "error: logical negation of logical op containing negation, can be simplified" which is nonsense (the message stayed the same while the checks evolved). It actually complained about !(a==b && c==d) to be rewritten as (a!=b || c!=d) whether that makes sense or not.. it may save one boolean operation, yes, but.. Ib478d46d7ff926c1c9f65fec059c7a3f31fa7ce3 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103601 Tested-by: Jenkins Reviewed-by: Eike Rathke <er...@redhat.com> (cherry picked from commit 1acf517906b7cdc4931dd26319d467dff53ae7d2) Conflicts: sc/source/core/tool/compiler.cxx Change-Id: I2aa57cdcf530d7a0697c4ffbd5dccb86bb526d8e Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103598 Tested-by: Jenkins Reviewed-by: Xisco Fauli <xiscofa...@libreoffice.org> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/104486 Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoff...@gmail.com> Reviewed-by: Muhammet Kara <muhammet.k...@collabora.com> diff --git a/formula/source/core/api/FormulaCompiler.cxx b/formula/source/core/api/FormulaCompiler.cxx index 16e58f9c8e71..5de0b8d76491 100644 --- a/formula/source/core/api/FormulaCompiler.cxx +++ b/formula/source/core/api/FormulaCompiler.cxx @@ -31,6 +31,9 @@ #include <svl/zforlist.hxx> #include <unotools/resmgr.hxx> #include <unotools/charclass.hxx> +#include <vcl/svapp.hxx> +#include <vcl/settings.hxx> +#include <comphelper/processfactory.hxx> #include <com/sun/star/sheet/FormulaOpCodeMapEntry.hpp> #include <com/sun/star/sheet/FormulaMapGroup.hpp> #include <com/sun/star/sheet/FormulaMapGroupSpecialOffset.hpp> @@ -143,6 +146,14 @@ void lclPushOpCodeMapEntries( ::std::vector< sheet::FormulaOpCodeMapEntry >& rVe lclPushOpCodeMapEntry( rVec, pTable, *pnOpCodes ); } +CharClass* createCharClassIfNonEnglishUI() +{ + const LanguageTag& rLanguageTag( Application::GetSettings().GetUILanguageTag()); + if (rLanguageTag.getLanguage() == "en") + return nullptr; + return new CharClass( ::comphelper::getProcessComponentContext(), rLanguageTag); +} + class OpCodeList { public: @@ -166,8 +177,8 @@ OpCodeList::OpCodeList(bool bLocalized, const std::pair<const char*, int>* pSymb , mpSymbols(pSymbols) , mbLocalized(bLocalized) { - SvtSysLocale aSysLocale; - const CharClass* pCharClass = (xMap->isEnglish() ? nullptr : aSysLocale.GetCharClassPtr()); + std::unique_ptr<CharClass> xCharClass( xMap->isEnglish() ? nullptr : createCharClassIfNonEnglishUI()); + const CharClass* pCharClass = xCharClass.get(); if (meSepType == FormulaCompiler::SeparatorType::RESOURCE_BASE) { for (sal_uInt16 i = 0; i <= SC_OPCODE_LAST_OPCODE_ID; ++i) @@ -813,8 +824,8 @@ FormulaCompiler::OpCodeMapPtr FormulaCompiler::CreateOpCodeMap( NonConstOpCodeMapPtr xMap( new OpCodeMap( SC_OPCODE_LAST_OPCODE_ID + 1, false, FormulaGrammar::mergeToGrammar( FormulaGrammar::setEnglishBit( FormulaGrammar::GRAM_EXTERNAL, bEnglish), FormulaGrammar::CONV_UNSPECIFIED))); - SvtSysLocale aSysLocale; - const CharClass* pCharClass = (xMap->isEnglish() ? nullptr : aSysLocale.GetCharClassPtr()); + std::unique_ptr<CharClass> xCharClass( xMap->isEnglish() ? nullptr : createCharClassIfNonEnglishUI()); + const CharClass* pCharClass = xCharClass.get(); for (auto const& rMapEntry : rMapping) { OpCode eOp = OpCode(rMapEntry.Token.OpCode); diff --git a/sc/inc/compiler.hxx b/sc/inc/compiler.hxx index c19ff834ded9..5de80a6b9fb3 100644 --- a/sc/inc/compiler.hxx +++ b/sc/inc/compiler.hxx @@ -251,7 +251,8 @@ public: private: - static CharClass *pCharClassEnglish; // character classification for en_US locale + static const CharClass *pCharClassEnglish; // character classification for en_US locale + static const CharClass *pCharClassLocalized; // character classification for UI locale static const Convention *pConventions[ formula::FormulaGrammar::CONV_LAST ]; static const struct AddInMap @@ -282,7 +283,8 @@ private: std::queue<OpCode> maPendingOpCodes; // additional opcodes generated from a single symbol - const CharClass* pCharClass; // which character classification is used for parseAnyToken + const CharClass* pCharClass; // which character classification is used for parseAnyToken and upper/lower + bool mbCharClassesDiffer; // whether pCharClass and current system locale's CharClass differ sal_uInt16 mnPredetectedReference; // reference when reading ODF, 0 (none), 1 (single) or 2 (double) sal_Int32 mnRangeOpPosInSymbol; // if and where a range operator is in symbol const Convention *pConv; @@ -319,6 +321,7 @@ private: #endif bool NextNewToken(bool bInArray); + bool ToUpperAsciiOrI18nIsAscii( OUString& rUpper, const OUString& rOrg ) const; virtual void SetError(FormulaError nError) override; sal_Int32 NextSymbol(bool bInArray); @@ -349,7 +352,8 @@ private: */ ScRangeData* GetRangeData( const formula::FormulaToken& pToken ) const; - static void InitCharClassEnglish(); + static const CharClass* GetCharClassEnglish(); + static const CharClass* GetCharClassLocalized(); public: ScCompiler( sc::CompileFormulaContext& rCxt, const ScAddress& rPos, diff --git a/sc/source/core/tool/compiler.cxx b/sc/source/core/tool/compiler.cxx index d84efe3b1003..bb5efae527fd 100644 --- a/sc/source/core/tool/compiler.cxx +++ b/sc/source/core/tool/compiler.cxx @@ -22,6 +22,7 @@ #include <compiler.hxx> #include <vcl/svapp.hxx> +#include <vcl/settings.hxx> #include <sfx2/app.hxx> #include <sfx2/objsh.hxx> #include <basic/sbmeth.hxx> @@ -78,7 +79,8 @@ using namespace formula; using namespace ::com::sun::star; using ::std::vector; -CharClass* ScCompiler::pCharClassEnglish = nullptr; +const CharClass* ScCompiler::pCharClassEnglish = nullptr; +const CharClass* ScCompiler::pCharClassLocalized = nullptr; const ScCompiler::Convention* ScCompiler::pConventions[ ] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }; enum ScanState @@ -168,12 +170,17 @@ void ScCompiler::DeInit() delete pCharClassEnglish; pCharClassEnglish = nullptr; } + if (pCharClassLocalized) + { + delete pCharClassLocalized; + pCharClassLocalized = nullptr; + } } bool ScCompiler::IsEnglishSymbol( const OUString& rName ) { // function names are always case-insensitive - OUString aUpper = ScGlobal::pCharClass->uppercase(rName); + OUString aUpper = GetCharClassEnglish()->uppercase(rName); // 1. built-in function name OpCode eOp = ScCompiler::GetEnglishOpCode( aUpper ); @@ -192,11 +199,27 @@ bool ScCompiler::IsEnglishSymbol( const OUString& rName ) return !aIntName.isEmpty(); // no valid function name } -void ScCompiler::InitCharClassEnglish() +const CharClass* ScCompiler::GetCharClassEnglish() { - css::lang::Locale aLocale( "en", "US", ""); - pCharClassEnglish = new CharClass( - ::comphelper::getProcessComponentContext(), LanguageTag( aLocale)); + if (!pCharClassEnglish) + { + css::lang::Locale aLocale( "en", "US", ""); + pCharClassEnglish = new CharClass( + ::comphelper::getProcessComponentContext(), LanguageTag( aLocale)); + } + return pCharClassEnglish; +} + +const CharClass* ScCompiler::GetCharClassLocalized() +{ + if (!pCharClassLocalized) + { + // Switching UI language requires restart; if not, we would have to + // keep track of that. + pCharClassLocalized = new CharClass( + ::comphelper::getProcessComponentContext(), Application::GetSettings().GetUILanguageTag()); + } + return pCharClassLocalized; } void ScCompiler::SetGrammar( const FormulaGrammar::Grammar eGrammar ) @@ -261,13 +284,19 @@ void ScCompiler::SetFormulaLanguage( const ScCompiler::OpCodeMapPtr & xMap ) { mxSymbols = xMap; if (mxSymbols->isEnglish()) - { - if (!pCharClassEnglish) - InitCharClassEnglish(); - pCharClass = pCharClassEnglish; - } + pCharClass = GetCharClassEnglish(); else - pCharClass = ScGlobal::pCharClass; + pCharClass = GetCharClassLocalized(); + + // The difference is needed for an uppercase() call that usually does not + // result in different strings but for a few languages like Turkish; + // though even de-DE and de-CH may differ in ß/SS handling.. + // At least don't care if both are English. + // The current locale is more likely to not be "en" so check first. + const LanguageTag& rLT1 = ScGlobal::GetpLocaleData()->getLanguageTag(); + const LanguageTag& rLT2 = pCharClass->getLanguageTag(); + mbCharClassesDiffer = (rLT1 != rLT2 && (rLT1.getLanguage() != "en" || rLT2.getLanguage() != "en")); + SetGrammarAndRefConvention( mxSymbols->getGrammar(), GetGrammar()); } } @@ -1789,6 +1818,7 @@ ScCompiler::ScCompiler( sc::CompileFormulaContext& rCxt, const ScAddress& rPos, mnCurrentSheetTab(-1), mnCurrentSheetEndPos(0), pCharClass(ScGlobal::pCharClass), + mbCharClassesDiffer(false), mnPredetectedReference(0), mnRangeOpPosInSymbol(-1), pConv(GetRefConvention(FormulaGrammar::CONV_OOO)), @@ -1812,6 +1842,7 @@ ScCompiler::ScCompiler( ScDocument* pDocument, const ScAddress& rPos, ScTokenArr mnCurrentSheetEndPos(0), nSrcPos(0), pCharClass( ScGlobal::pCharClass ), + mbCharClassesDiffer(false), mnPredetectedReference(0), mnRangeOpPosInSymbol(-1), pConv( GetRefConvention( FormulaGrammar::CONV_OOO ) ), @@ -1834,6 +1865,7 @@ ScCompiler::ScCompiler( sc::CompileFormulaContext& rCxt, const ScAddress& rPos, mnCurrentSheetTab(-1), mnCurrentSheetEndPos(0), pCharClass(ScGlobal::pCharClass), + mbCharClassesDiffer(false), mnPredetectedReference(0), mnRangeOpPosInSymbol(-1), pConv(GetRefConvention(FormulaGrammar::CONV_OOO)), @@ -1857,6 +1889,7 @@ ScCompiler::ScCompiler( ScDocument* pDocument, const ScAddress& rPos, mnCurrentSheetEndPos(0), nSrcPos(0), pCharClass( ScGlobal::pCharClass ), + mbCharClassesDiffer(false), mnPredetectedReference(0), mnRangeOpPosInSymbol(-1), pConv( GetRefConvention( FormulaGrammar::CONV_OOO ) ), @@ -4157,9 +4190,9 @@ void ScCompiler::AutoCorrectParsedSymbol() } } -static bool lcl_UpperAsciiOrI18n( OUString& rUpper, const OUString& rOrg, FormulaGrammar::Grammar eGrammar ) +bool ScCompiler::ToUpperAsciiOrI18nIsAscii( OUString& rUpper, const OUString& rOrg ) const { - if (FormulaGrammar::isODFF( eGrammar )) + if (FormulaGrammar::isODFF( meGrammar )) { // ODFF has a defined set of English function names, avoid i18n // overhead. @@ -4168,7 +4201,8 @@ static bool lcl_UpperAsciiOrI18n( OUString& rUpper, const OUString& rOrg, Formul } else { - rUpper = ScGlobal::pCharClass->uppercase(rOrg); + // One of localized or English. + rUpper = pCharClass->uppercase(rOrg); return false; } } @@ -4262,7 +4296,7 @@ bool ScCompiler::NextNewToken( bool bInArray ) else { OUString aTmpStr( cSymbol[0] ); - bMayBeFuncName = ScGlobal::pCharClass->isLetter( aTmpStr, 0 ); + bMayBeFuncName = pCharClass->isLetter( aTmpStr, 0 ); bAsciiNonAlnum = false; } @@ -4313,7 +4347,7 @@ bool ScCompiler::NextNewToken( bool bInArray ) if (bAsciiNonAlnum) { - bAsciiUpper = lcl_UpperAsciiOrI18n( aUpper, aOrg, meGrammar); + bAsciiUpper = ToUpperAsciiOrI18nIsAscii( aUpper, aOrg); if (cSymbol[0] == '#') { // Check for TableRef item specifiers first. @@ -4339,7 +4373,7 @@ bool ScCompiler::NextNewToken( bool bInArray ) if (bMayBeFuncName) { if (aUpper.isEmpty()) - bAsciiUpper = lcl_UpperAsciiOrI18n( aUpper, aOrg, meGrammar); + bAsciiUpper = ToUpperAsciiOrI18nIsAscii( aUpper, aOrg); if (IsOpCode( aUpper, bInArray )) return true; } @@ -4363,7 +4397,7 @@ bool ScCompiler::NextNewToken( bool bInArray ) } if (aUpper.isEmpty()) - bAsciiUpper = lcl_UpperAsciiOrI18n( aUpper, aOrg, meGrammar); + bAsciiUpper = ToUpperAsciiOrI18nIsAscii( aUpper, aOrg); // IsBoolean() before IsValue() to catch inline bools without the kludge // for inline arrays. @@ -4374,8 +4408,14 @@ bool ScCompiler::NextNewToken( bool bInArray ) return true; // User defined names and such do need i18n upper also in ODF. - if (bAsciiUpper) + if (bAsciiUpper || mbCharClassesDiffer) + { + // Use current system locale here because user defined symbols are + // more likely in that localized language than in the formula + // language. This in corner cases needs to continue to work for + // existing documents and environments. aUpper = ScGlobal::pCharClass->uppercase( aOrg ); + } if (IsNamedRange( aUpper )) return true; @@ -4433,7 +4473,7 @@ bool ScCompiler::NextNewToken( bool bInArray ) // Provide single token information and continue. Do not set an error, that // would prematurely end compilation. Simple unknown names are handled by // the interpreter. - aUpper = ScGlobal::pCharClass->lowercase( aUpper ); + aUpper = pCharClass->lowercase( aUpper ); svl::SharedString aSS = pDoc->GetSharedStringPool().intern(aUpper); maRawToken.SetString(aSS.getData(), aSS.getDataIgnoreCase()); maRawToken.NewOpCode( ocBad ); _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits