i18npool/qa/cppunit/test_breakiterator.cxx           |   18 ++++++++++++++
 lingucomponent/source/spellcheck/spell/sspellimp.cxx |   23 +++++++++++++++----
 sw/qa/extras/layout/layout2.cxx                      |   20 ++++++++++++++++
 3 files changed, 56 insertions(+), 5 deletions(-)

New commits:
commit 3fdf13e7f9e0e053d11bfe477407ec9331160ef2
Author:     László Németh <[email protected]>
AuthorDate: Sat Dec 27 13:40:51 2025 +0100
Commit:     Adolfo Jayme Barrientos <[email protected]>
CommitDate: Tue Dec 30 22:36:36 2025 +0100

    tdf#170140 lingucomponent: check words with non-ASCII apostrophe
    
    If the spelling dictionary contains them with non-ASCII apostrophe.
    
    Previously the words were checked only with apostrophes converted
    to their ASCII version, resulting continuous false alarms, despite
    the correct orthography in the document and in the spelling
    dictionary.
    
    Now the words are checked both with ASCII conversion and with
    the original non-ASCII apostrophes (because still there are
    UTF-8 dictionaries with ASCII apostrophes, moreover, dictionaries
    containing mixed apostrophes in the same dic file).
    
    Example words for the Hungarian dictionary: d’Arc, d’Alembert,
    McDonald’s (and their several recognized suffixed forms: d’Arcért,
    d’Arckal, d’Arcként, d’Arcnak, d’Arcot etc. etc.)
    
    Add unit tests for 1) break iterator and 2) spell checking with
    non-ASCII apostrophes.
    
    Change-Id: I24a570df41fa5aba2e7b67dde0db33377717dc2a
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/196248
    Reviewed-by: László Németh <[email protected]>
    Tested-by: Jenkins
    Signed-off-by: Xisco Fauli <[email protected]>
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/196283
    (cherry picked from commit 10d08d2ffead7597a44e0ce0cf4c5e98ffc29b22)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/196309
    Reviewed-by: Adolfo Jayme Barrientos <[email protected]>

diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx 
b/i18npool/qa/cppunit/test_breakiterator.cxx
index ae2e5af4e5e9..c91532d5ff3a 100644
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -1844,7 +1844,18 @@ void TestBreakIterator::testDictWordAbbreviationHU()
 
     for (const auto& rLocale : aLocale)
     {
-        auto aTest = u"Pl. stb. dr.-ral Mo.-gal 50-et 50.-et"_ustr;
+
+        auto aTest =
+                // abbreviations
+                u"Pl. stb. "
+                // abbreviations with suffixes
+                "dr.-ral Mo.-gal "
+                // number with a suffix
+                "50-et "
+                // ordinal number with a suffix
+                "50.-et "
+                // word with a non-ASCII apostrophe
+                "d’Arc"_ustr;
 
         i18n::Boundary aBounds
             = m_xBreak->getWordBoundary(aTest, 1, rLocale, 
i18n::WordType::DICTIONARY_WORD, false);
@@ -1875,6 +1886,11 @@ void TestBreakIterator::testDictWordAbbreviationHU()
             = m_xBreak->getWordBoundary(aTest, 31, rLocale, 
i18n::WordType::DICTIONARY_WORD, false);
         CPPUNIT_ASSERT_EQUAL(sal_Int32(31), aBounds.startPos);
         CPPUNIT_ASSERT_EQUAL(sal_Int32(37), aBounds.endPos);
+
+        aBounds
+            = m_xBreak->getWordBoundary(aTest, 38, rLocale, 
i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(38), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(43), aBounds.endPos);
     }
 }
 
diff --git a/lingucomponent/source/spellcheck/spell/sspellimp.cxx 
b/lingucomponent/source/spellcheck/spell/sspellimp.cxx
index fe676cde5312..153ce18ec2be 100644
--- a/lingucomponent/source/spellcheck/spell/sspellimp.cxx
+++ b/lingucomponent/source/spellcheck/spell/sspellimp.cxx
@@ -243,6 +243,7 @@ sal_Bool SAL_CALL SpellChecker::hasLocale(const Locale& 
rLocale)
     return bRes;
 }
 
+#define SPELL_NON_ASCII_APOSTROPHE 1 << 10
 sal_Int16 SpellChecker::GetSpellFailure(const OUString &rWord, const Locale 
&rLocale, int& rInfo)
 {
     if (rWord.getLength() > MAXWORDLEN)
@@ -261,15 +262,19 @@ sal_Int16 SpellChecker::GetSpellFailure(const OUString 
&rWord, const Locale &rLo
     sal_Int32 n = rBuf.getLength();
     sal_Unicode c;
     sal_Int32 extrachar = 0;
-
+    const bool bDoNotConvertApostrophe = bool(rInfo & 
SPELL_NON_ASCII_APOSTROPHE);
+    bool bHasNonASCIIApostrophe = false;
+    rInfo = 0;
     for (sal_Int32 ix=0; ix < n; ix++)
     {
         c = rBuf[ix];
         if ((c == 0x201C) || (c == 0x201D))
             rBuf[ix] = u'"';
-        else if ((c == 0x2018) || (c == 0x2019))
+        else if (!bDoNotConvertApostrophe && ((c == 0x2018) || (c == 0x2019)))
+        {
             rBuf[ix] = u'\'';
-
+            bHasNonASCIIApostrophe = true;
+        }
         // recognize words with Unicode ligatures and ZWNJ/ZWJ characters (only
         // with 8-bit encoded dictionaries. For UTF-8 encoded dictionaries
         // set ICONV and IGNORE aff file options, if needed.)
@@ -370,6 +375,10 @@ sal_Int16 SpellChecker::GetSpellFailure(const OUString 
&rWord, const Locale &rLo
         }
     }
 
+    // checked with apostrophe conversion
+    if ( bHasNonASCIIApostrophe )
+        rInfo |= SPELL_NON_ASCII_APOSTROPHE;
+
     return nRes;
 }
 
@@ -396,8 +405,14 @@ sal_Bool SAL_CALL SpellChecker::isValid( const OUString& 
rWord, const Locale& rL
     PropertyHelper_Spelling& rHelper = GetPropHelper();
     rHelper.SetTmpPropVals( rProperties );
 
-    int nInfo = 0;
+    int nInfo = 0; // return compound information, disable apostrophe 
conversion
     sal_Int16 nFailure = GetSpellFailure( rWord, rLocale, nInfo );
+    // it contains non-ASCII apostrophe, and it was bad with ASCII conversion:
+    // check the word with the original apostrophe character(s), too
+    if ( nFailure != -1 && nInfo & SPELL_NON_ASCII_APOSTROPHE ) {
+        nInfo = SPELL_NON_ASCII_APOSTROPHE; // disable apostrophe conversion
+        nFailure = GetSpellFailure( rWord, rLocale, nInfo );
+    }
     if (nFailure != -1 && !rWord.match(SPELL_XML, 0))
     {
         LanguageType nLang = LinguLocaleToLanguage( rLocale );
diff --git a/sw/qa/extras/layout/layout2.cxx b/sw/qa/extras/layout/layout2.cxx
index a5ba9d7f9399..9db240eb85c3 100644
--- a/sw/qa/extras/layout/layout2.cxx
+++ b/sw/qa/extras/layout/layout2.cxx
@@ -1275,6 +1275,26 @@ CPPUNIT_TEST_FIXTURE(SwLayoutWriter2, 
testTdf158885_not_compound_remain)
                 u"lenes emberellenes emberellenes emberellenes emberellenes 
emberellenes ");
 }
 
+// TODO: move this test to the lingucomponent project
+CPPUNIT_TEST_FIXTURE(SwLayoutWriter2, testTdf170140)
+{
+    uno::Reference<linguistic2::XSpellChecker1> xSpell = 
LinguMgr::GetSpellChecker();
+    auto aLocale = lang::Locale(u"hu"_ustr, u"HU"_ustr, OUString());
+    LanguageType eLang = LanguageTag::convertToLanguageType(aLocale);
+    if (!xSpell.is() || !xSpell->hasLanguage(static_cast<sal_uInt16>(eLang)))
+        return;
+
+    uno::Sequence<beans::PropertyValue> aProperties;
+
+    // correct non-ASCII apostrophe
+    OUString sWord(u"d’Arc"_ustr);
+    CPPUNIT_ASSERT(xSpell->isValid(sWord, static_cast<sal_uInt16>(eLang), 
aProperties));
+
+    // bad ASCII apostrophe
+    OUString sWord2(u"d'Arc"_ustr);
+    CPPUNIT_ASSERT(!xSpell->isValid(sWord2, static_cast<sal_uInt16>(eLang), 
aProperties));
+}
+
 CPPUNIT_TEST_FIXTURE(SwLayoutWriter2, testRedlineNumberInFootnote)
 {
     createSwDoc("tdf85610.fodt");

Reply via email to