i18npool/qa/cppunit/test_breakiterator.cxx           |   18 ++++++++++++++
 lingucomponent/source/spellcheck/spell/sspellimp.cxx |   23 +++++++++++++++----
 sw/qa/extras/layout/layout2.cxx                      |   20 ++++++++++++++++
 3 files changed, 56 insertions(+), 5 deletions(-)

New commits:
commit c1702ddc2e4fc3cbcdb2dbdc848d8a95e8ec9a52
Author:     László Németh <[email protected]>
AuthorDate: Sat Dec 27 13:40:51 2025 +0100
Commit:     László Németh <[email protected]>
CommitDate: Sun Dec 28 06:17:07 2025 +0100

    tdf#170140 lingucomponent: check words with non-ASCII apostrophe
    
    If the spelling dictionary contains them with non-ASCII apostrophe.
    
    Previously the words were checked only with apostrophes converted
    to their ASCII version, resulting continuous false alarms, despite
    the correct orthography in the document and in the spelling
    dictionary.
    
    Now the words are checked both with ASCII conversion and with
    the original non-ASCII apostrophes (because still there are
    UTF-8 dictionaries with ASCII apostrophes, moreover, dictionaries
    containing mixed apostrophes in the same dic file).
    
    Example words for the Hungarian dictionary: d’Arc, d’Alembert,
    McDonald’s (and their several recognized suffixed forms: d’Arcért,
    d’Arckal, d’Arcként, d’Arcnak, d’Arcot etc. etc.)
    
    Add unit tests for 1) break iterator and 2) spell checking with
    non-ASCII apostrophes.
    
    Change-Id: I24a570df41fa5aba2e7b67dde0db33377717dc2a
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/196248
    Reviewed-by: László Németh <[email protected]>
    Tested-by: Jenkins

diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx 
b/i18npool/qa/cppunit/test_breakiterator.cxx
index ae2e5af4e5e9..c91532d5ff3a 100644
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -1844,7 +1844,18 @@ void TestBreakIterator::testDictWordAbbreviationHU()
 
     for (const auto& rLocale : aLocale)
     {
-        auto aTest = u"Pl. stb. dr.-ral Mo.-gal 50-et 50.-et"_ustr;
+
+        auto aTest =
+                // abbreviations
+                u"Pl. stb. "
+                // abbreviations with suffixes
+                "dr.-ral Mo.-gal "
+                // number with a suffix
+                "50-et "
+                // ordinal number with a suffix
+                "50.-et "
+                // word with a non-ASCII apostrophe
+                "d’Arc"_ustr;
 
         i18n::Boundary aBounds
             = m_xBreak->getWordBoundary(aTest, 1, rLocale, 
i18n::WordType::DICTIONARY_WORD, false);
@@ -1875,6 +1886,11 @@ void TestBreakIterator::testDictWordAbbreviationHU()
             = m_xBreak->getWordBoundary(aTest, 31, rLocale, 
i18n::WordType::DICTIONARY_WORD, false);
         CPPUNIT_ASSERT_EQUAL(sal_Int32(31), aBounds.startPos);
         CPPUNIT_ASSERT_EQUAL(sal_Int32(37), aBounds.endPos);
+
+        aBounds
+            = m_xBreak->getWordBoundary(aTest, 38, rLocale, 
i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(38), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(43), aBounds.endPos);
     }
 }
 
diff --git a/lingucomponent/source/spellcheck/spell/sspellimp.cxx 
b/lingucomponent/source/spellcheck/spell/sspellimp.cxx
index fe676cde5312..153ce18ec2be 100644
--- a/lingucomponent/source/spellcheck/spell/sspellimp.cxx
+++ b/lingucomponent/source/spellcheck/spell/sspellimp.cxx
@@ -243,6 +243,7 @@ sal_Bool SAL_CALL SpellChecker::hasLocale(const Locale& 
rLocale)
     return bRes;
 }
 
+#define SPELL_NON_ASCII_APOSTROPHE 1 << 10
 sal_Int16 SpellChecker::GetSpellFailure(const OUString &rWord, const Locale 
&rLocale, int& rInfo)
 {
     if (rWord.getLength() > MAXWORDLEN)
@@ -261,15 +262,19 @@ sal_Int16 SpellChecker::GetSpellFailure(const OUString 
&rWord, const Locale &rLo
     sal_Int32 n = rBuf.getLength();
     sal_Unicode c;
     sal_Int32 extrachar = 0;
-
+    const bool bDoNotConvertApostrophe = bool(rInfo & 
SPELL_NON_ASCII_APOSTROPHE);
+    bool bHasNonASCIIApostrophe = false;
+    rInfo = 0;
     for (sal_Int32 ix=0; ix < n; ix++)
     {
         c = rBuf[ix];
         if ((c == 0x201C) || (c == 0x201D))
             rBuf[ix] = u'"';
-        else if ((c == 0x2018) || (c == 0x2019))
+        else if (!bDoNotConvertApostrophe && ((c == 0x2018) || (c == 0x2019)))
+        {
             rBuf[ix] = u'\'';
-
+            bHasNonASCIIApostrophe = true;
+        }
         // recognize words with Unicode ligatures and ZWNJ/ZWJ characters (only
         // with 8-bit encoded dictionaries. For UTF-8 encoded dictionaries
         // set ICONV and IGNORE aff file options, if needed.)
@@ -370,6 +375,10 @@ sal_Int16 SpellChecker::GetSpellFailure(const OUString 
&rWord, const Locale &rLo
         }
     }
 
+    // checked with apostrophe conversion
+    if ( bHasNonASCIIApostrophe )
+        rInfo |= SPELL_NON_ASCII_APOSTROPHE;
+
     return nRes;
 }
 
@@ -396,8 +405,14 @@ sal_Bool SAL_CALL SpellChecker::isValid( const OUString& 
rWord, const Locale& rL
     PropertyHelper_Spelling& rHelper = GetPropHelper();
     rHelper.SetTmpPropVals( rProperties );
 
-    int nInfo = 0;
+    int nInfo = 0; // return compound information, disable apostrophe 
conversion
     sal_Int16 nFailure = GetSpellFailure( rWord, rLocale, nInfo );
+    // it contains non-ASCII apostrophe, and it was bad with ASCII conversion:
+    // check the word with the original apostrophe character(s), too
+    if ( nFailure != -1 && nInfo & SPELL_NON_ASCII_APOSTROPHE ) {
+        nInfo = SPELL_NON_ASCII_APOSTROPHE; // disable apostrophe conversion
+        nFailure = GetSpellFailure( rWord, rLocale, nInfo );
+    }
     if (nFailure != -1 && !rWord.match(SPELL_XML, 0))
     {
         LanguageType nLang = LinguLocaleToLanguage( rLocale );
diff --git a/sw/qa/extras/layout/layout2.cxx b/sw/qa/extras/layout/layout2.cxx
index f332a5226806..b288a036359d 100644
--- a/sw/qa/extras/layout/layout2.cxx
+++ b/sw/qa/extras/layout/layout2.cxx
@@ -1275,6 +1275,26 @@ CPPUNIT_TEST_FIXTURE(SwLayoutWriter2, 
testTdf158885_not_compound_remain)
                 u"lenes emberellenes emberellenes emberellenes emberellenes 
emberellenes ");
 }
 
+// TODO: move this test to the lingucomponent project
+CPPUNIT_TEST_FIXTURE(SwLayoutWriter2, testTdf170140)
+{
+    uno::Reference<linguistic2::XSpellChecker1> xSpell = 
LinguMgr::GetSpellChecker();
+    auto aLocale = lang::Locale(u"hu"_ustr, u"HU"_ustr, OUString());
+    LanguageType eLang = LanguageTag::convertToLanguageType(aLocale);
+    if (!xSpell.is() || !xSpell->hasLanguage(static_cast<sal_uInt16>(eLang)))
+        return;
+
+    uno::Sequence<beans::PropertyValue> aProperties;
+
+    // correct non-ASCII apostrophe
+    OUString sWord(u"d’Arc"_ustr);
+    CPPUNIT_ASSERT(xSpell->isValid(sWord, static_cast<sal_uInt16>(eLang), 
aProperties));
+
+    // bad ASCII apostrophe
+    OUString sWord2(u"d'Arc"_ustr);
+    CPPUNIT_ASSERT(!xSpell->isValid(sWord2, static_cast<sal_uInt16>(eLang), 
aProperties));
+}
+
 CPPUNIT_TEST_FIXTURE(SwLayoutWriter2, testRedlineNumberInFootnote)
 {
     createSwDoc("tdf85610.fodt");

Reply via email to