i18npool/source/characterclassification/cclass_unicode_parser.cxx | 18 ++++++++-- offapi/com/sun/star/i18n/KParseTokens.idl | 7 +++ sc/source/core/tool/compiler.cxx | 6 +-- sw/source/core/bastyp/calc.cxx | 2 - 4 files changed, 26 insertions(+), 7 deletions(-)
New commits: commit 0644deaa8a441e2ad9c41ad4b4a528b1579e9cd9 Author: Michael Stahl <michael.st...@allotropia.de> AuthorDate: Wed Jan 29 19:54:14 2025 +0100 Commit: Xisco Fauli <xiscofa...@libreoffice.org> CommitDate: Tue Feb 4 16:43:42 2025 +0100 tdf#158867 offapi,i18npool,starmath: fix "," in Math again This is essentially the same bug that was fixed in commit 2caa9d7ddcff3f6e380c306b737e9b5e9cdaf4c2 "Resolves: tdf#127873 accept ',' comma group separator in number entities again". The problem is that sw wants to recognize group separators only if there are 3 digits following so it really looks like a number (see bug tdf#42518), but starmath really wants to recognize either comma or dot as a decimal separator and uses the GROUP_SEPARATOR_IN_NUMBER as a hack to get this behaviour, so the same parser can work in any locale. Introduce new constant GROUP_SEPARATOR_IN_NUMBER_3 so that sw and starmath can request different parsing. sc/source/core/tool/compiler.cxx also uses GROUP_SEPARATOR_IN_NUMBER and it's unclear to me what is desired there, but there are no known regression reports so leave the behaviour there unchanged. (regression from commit 1bd9a51b826015746069fcc0d02a30a2ddc7e7f5) Change-Id: Icb7662f26d89677cf84ccad07a2ea4c380587ab4 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/180920 Tested-by: Jenkins Reviewed-by: Michael Stahl <michael.st...@allotropia.de> (cherry picked from commit ff16c4e3f27efc0fc9ed734b19ae778482566cdb) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/181118 Reviewed-by: Thorsten Behrens <thorsten.behr...@allotropia.de> diff --git a/i18npool/source/characterclassification/cclass_unicode_parser.cxx b/i18npool/source/characterclassification/cclass_unicode_parser.cxx index 9ed95e2f907a..97fae9518adc 100644 --- a/i18npool/source/characterclassification/cclass_unicode_parser.cxx +++ b/i18npool/source/characterclassification/cclass_unicode_parser.cxx @@ -23,6 +23,7 @@ #include <rtl/character.hxx> #include <rtl/math.hxx> #include <rtl/ustring.hxx> +#include <sal/log.hxx> #include <com/sun/star/i18n/KParseTokens.hpp> #include <com/sun/star/i18n/KParseType.hpp> #include <com/sun/star/i18n/LocaleData2.hpp> @@ -441,7 +442,11 @@ void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startChar cDecimalSepAlt = aItem.decimalSeparatorAlternative.toChar(); } - if (nContTypes & KParseTokens::GROUP_SEPARATOR_IN_NUMBER) + SAL_WARN_IF((nContTypes & KParseTokens::GROUP_SEPARATOR_IN_NUMBER) + && (nContTypes & KParseTokens::GROUP_SEPARATOR_IN_NUMBER_3), + "i18npool", "only one GROUP_SEPARATOR_IN_NUMBER* should be used"); + if (nContTypes & KParseTokens::GROUP_SEPARATOR_IN_NUMBER + || nContTypes & KParseTokens::GROUP_SEPARATOR_IN_NUMBER_3) { if ( cGroupSep < nDefCnt ) pTable[cGroupSep] |= ParserFlags::VALUE; @@ -827,13 +832,20 @@ void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 { if (current == cGroupSep) { - // accept only if it is followed by 3 digits + // depending or requested nContTypes, accept only if + // it is followed by 3 digits sal_Int32 tempIndex(index); sal_uInt32 const nextChar2((tempIndex < rText.getLength()) ? rText.iterateCodePoints(&tempIndex) : 0); sal_uInt32 const nextChar3((tempIndex < rText.getLength()) ? rText.iterateCodePoints(&tempIndex) : 0); - if (getFlags(nextChar, eState) & ParserFlags::VALUE_DIGIT + if ((nContTypes & KParseTokens::GROUP_SEPARATOR_IN_NUMBER_3) + && getFlags(nextChar, eState) & ParserFlags::VALUE_DIGIT && getFlags(nextChar2, eState) & ParserFlags::VALUE_DIGIT && getFlags(nextChar3, eState) & ParserFlags::VALUE_DIGIT) + { + nParseTokensType |= KParseTokens::GROUP_SEPARATOR_IN_NUMBER_3; + } + else if ((nContTypes & KParseTokens::GROUP_SEPARATOR_IN_NUMBER_3) == 0 + && getFlags(nextChar, eState) & ParserFlags::VALUE_DIGIT) { nParseTokensType |= KParseTokens::GROUP_SEPARATOR_IN_NUMBER; } diff --git a/offapi/com/sun/star/i18n/KParseTokens.idl b/offapi/com/sun/star/i18n/KParseTokens.idl index f06b9861d6df..bb04c251d82c 100644 --- a/offapi/com/sun/star/i18n/KParseTokens.idl +++ b/offapi/com/sun/star/i18n/KParseTokens.idl @@ -93,6 +93,13 @@ published constants KParseTokens /// Unicode (above 127) other number const long UNI_OTHER_NUMBER = 0x00080000; + /** The same as GROUP_SEPARATOR_IN_NUMBER, but require <em>3</em> + digits following the group separator. + + @since LibreOffice 25.2 + */ + const long GROUP_SEPARATOR_IN_NUMBER_3 = 0x04000000; + /** If this bit is set in <em>nContCharFlags</em> parameters, the locale's group separator characters in numbers are accepted and ignored/skipped. Else a group separator in a number ends the diff --git a/sc/source/core/tool/compiler.cxx b/sc/source/core/tool/compiler.cxx index 26684473bc44..6d7cd6fb6599 100644 --- a/sc/source/core/tool/compiler.cxx +++ b/sc/source/core/tool/compiler.cxx @@ -786,7 +786,7 @@ struct Convention_A1 : public ScCompiler::Convention static constexpr OUString aAddAllowed(u"?#"_ustr); return pCharClass->parseAnyToken( rFormula, nSrcPos, nStartFlags, aAddAllowed, - (bGroupSeparator ? nContFlags | KParseTokens::GROUP_SEPARATOR_IN_NUMBER : nContFlags), + (bGroupSeparator ? nContFlags | KParseTokens::GROUP_SEPARATOR_IN_NUMBER_3 : nContFlags), aAddAllowed ); } @@ -1411,7 +1411,7 @@ struct ConventionXL_A1 : public Convention_A1, public ConventionXL static constexpr OUString aAddAllowed(u"?!"_ustr); return pCharClass->parseAnyToken( rFormula, nSrcPos, nStartFlags, aAddAllowed, - (bGroupSeparator ? nContFlags | KParseTokens::GROUP_SEPARATOR_IN_NUMBER : nContFlags), + (bGroupSeparator ? nContFlags | KParseTokens::GROUP_SEPARATOR_IN_NUMBER_3 : nContFlags), aAddAllowed ); } @@ -1762,7 +1762,7 @@ struct ConventionXL_R1C1 : public ScCompiler::Convention, public ConventionXL return pCharClass->parseAnyToken( rFormula, nSrcPos, nStartFlags, aAddAllowed, - (bGroupSeparator ? nContFlags | KParseTokens::GROUP_SEPARATOR_IN_NUMBER : nContFlags), + (bGroupSeparator ? nContFlags | KParseTokens::GROUP_SEPARATOR_IN_NUMBER_3 : nContFlags), aAddAllowed ); } diff --git a/sw/source/core/bastyp/calc.cxx b/sw/source/core/bastyp/calc.cxx index fbfc4b701457..ebc24d4f6e0d 100644 --- a/sw/source/core/bastyp/calc.cxx +++ b/sw/source/core/bastyp/calc.cxx @@ -115,7 +115,7 @@ const sal_Int32 coStartFlags = // Continuing characters may be any alphanumeric, underscore, or dot. const sal_Int32 coContFlags = - (coStartFlags | i18n::KParseTokens::ASC_DOT | i18n::KParseTokens::GROUP_SEPARATOR_IN_NUMBER) + (coStartFlags | i18n::KParseTokens::ASC_DOT | i18n::KParseTokens::GROUP_SEPARATOR_IN_NUMBER_3) & ~i18n::KParseTokens::IGNORE_LEADING_WS; extern "C" {