sw/qa/uitest/writer_tests4/spellDialog.py |   36 ++++++++++++++++++
 sw/source/core/txtnode/txtedt.cxx         |   59 +++++++++++++++++++++++++-----
 2 files changed, 87 insertions(+), 8 deletions(-)

New commits:
commit 71dc8a291ecc5bdae541fce41c8895d6b3302082
Author:     László Németh <nem...@numbertext.org>
AuthorDate: Tue Dec 13 11:06:38 2022 +0100
Commit:     László Németh <nem...@numbertext.org>
CommitDate: Tue Dec 13 14:42:54 2022 +0000

    tdf#45949 sw: add spell checking to hyperlinked text
    
    Recognize plain words within hyperlinks and
    check their spelling.
    
    Not only URLs and e-mail addresses, but plain
    text of hyperlinks were removed from spell checking,
    which didn't conform to the specification:
    
    "URLs are skipped during spell checking
    
    Spelling of URLs in document doesn't make sense, because the words
    are not separated and often they aren't natural words at all.
    Since we can only decide what we know, we will only except URLs
    from spelling already recognized/formatted as URLs and if the URL
    and the text representation are equal."
    
    http://specs.openoffice.org/appwide/linguistic/Spellcheckdialog.sxw,
    see also https://bz.apache.org/ooo/show_bug.cgi?id=40133.
    
    Follow-up to commit ec8fdffec29de7c80da0c2a440e467c35a297119
    "tdf#152492 sw: fix unwanted spell checking of parts of URLs"
    and commit 2cca160f8bfc4597cf0ad3aaaf0017a5210ea0ec
    "tdf#126657, tdf#145104: Don’t set language to none on defined
    styles".
    
    Change-Id: If2698c54bcdee0de635abd324173c909d9161a02
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/144044
    Tested-by: László Németh <nem...@numbertext.org>
    Reviewed-by: László Németh <nem...@numbertext.org>

diff --git a/sw/qa/uitest/writer_tests4/spellDialog.py 
b/sw/qa/uitest/writer_tests4/spellDialog.py
index d4b19132c90c..183a85843ab9 100644
--- a/sw/qa/uitest/writer_tests4/spellDialog.py
+++ b/sw/qa/uitest/writer_tests4/spellDialog.py
@@ -169,4 +169,40 @@ frog, dogg, catt"""
             # This was "Baaed HTTP://www.baaad.org baaad baaad" (spelling URLs)
             self.assertEqual("Baaed http://www.baaad.org baaed baaad", 
output_text)
 
+    def test_tdf45949(self):
+        supported_locale = self.is_supported_locale("en", "US")
+        if not supported_locale:
+            self.skipTest("no dictionary support for en_US available")
+
+        with self.ui_test.create_doc_in_start_center("writer") as document:
+            cursor = document.getCurrentController().getViewCursor()
+            # Inserted text must be en_US, so make sure to set language in 
current location
+            cursor.CharLocale = Locale("en", "US", "")
+
+            xMainWindow = self.xUITest.getTopFocusWindow()
+            xEdit = xMainWindow.getChild("writer_edit")
+
+            # URL is recognized during typing
+            type_text(xEdit, "baaad http://www.baaad.org baaad")
+
+            # add spaces before and after the word "baaad" within the URL
+            cursor.goLeft(10, False)
+            type_text(xEdit, " ")
+            cursor.goLeft(6, False)
+            type_text(xEdit, " ")
+
+            with 
self.ui_test.execute_modeless_dialog_through_command(".uno:SpellingAndGrammarDialog",
 close_button="close") as xDialog:
+                checkgrammar = xDialog.getChild('checkgrammar')
+                if get_state_as_dict(checkgrammar)['Selected'] == 'true':
+                    checkgrammar.executeAction('CLICK', ())
+                self.assertTrue(get_state_as_dict(checkgrammar)['Selected'] == 
'false')
+
+                change = xDialog.getChild('change')
+                change.executeAction("CLICK", ())
+                change.executeAction("CLICK", ())
+
+            output_text = document.Text.getString()
+            # This was "Baaed HTTP://www. baaad .org baaed" (skipped non-URL 
words of hypertext)
+            self.assertEqual("Baaed http://www. baaed .org baaad", output_text)
+
 # vim: set shiftwidth=4 softtabstop=4 expandtab:
diff --git a/sw/source/core/txtnode/txtedt.cxx 
b/sw/source/core/txtnode/txtedt.cxx
index b0e0b0d4b6a6..59950ce9e06f 100644
--- a/sw/source/core/txtnode/txtedt.cxx
+++ b/sw/source/core/txtnode/txtedt.cxx
@@ -112,6 +112,54 @@ static bool lcl_HasComments(const SwTextNode& rNode)
     return false;
 }
 
+// possible delimiter characters within URLs for word breaking
+static bool lcl_IsDelim( const sal_Unicode c )
+{
+   return '#' == c || '$' == c || '%' == c || '&' == c || '+' == c ||
+          ',' == c || '-' == c || '.' == c || '/' == c || ':' == c ||
+          ';' == c || '=' == c || '?' == c || '@' == c || '_' == c;
+}
+
+// allow to check normal text with hyperlink by recognizing (parts of) URLs
+static bool lcl_IsURL(std::u16string_view rWord,
+    SwTextNode &rNode, sal_Int32 nBegin, sal_Int32 nLen)
+{
+    // not a text with hyperlink
+    if ( !rNode.GetTextAttrAt(nBegin, RES_TXTATR_INETFMT) )
+        return false;
+
+    // there is a dot in the word, wich is not a period ("example.org")
+    const size_t nPosAt = rWord.find('.');
+    if (nPosAt != std::u16string_view::npos && nPosAt < rWord.length() - 1)
+        return true;
+
+    // an e-mail address ("user@example")
+    if ( rWord.find('@') != std::u16string_view::npos )
+        return true;
+
+    const OUString& rText = rNode.GetText();
+
+    // scheme (e.g. "http" in "http://"; or "mailto" in "mailto:address";):
+    // word is followed by 1) ':' + an alphanumeric character; 2) or ':' + a 
delimiter
+    if ( nBegin + nLen + 2 <= rText.getLength() && ':' == rText[nBegin + nLen] 
)
+    {
+         sal_Unicode c = rText[nBegin + nLen + 1];
+         if ( u_isalnum(c) || lcl_IsDelim(c) )
+             return true;
+    }
+
+    // path, query, fragment (e.g. "path" in "example.org/path"):
+    // word is preceded by 1) an alphanumeric character + a delimiter; 2) or 
two delimiters
+    if ( 2 <= nBegin && lcl_IsDelim(rText[nBegin - 1]) )
+    {
+        sal_Unicode c = rText[nBegin - 2];
+        if ( u_isalnum(c) || lcl_IsDelim(c) )
+            return true;
+    }
+
+    return false;
+}
+
 /*
  * This has basically the same function as SwScriptInfo::MaskHiddenRanges,
  * only for deleted redlines
@@ -992,15 +1040,13 @@ bool SwTextNode::Spell(SwSpellArgs* pArgs)
         {
             const OUString& rWord = aScanner.GetWord();
 
-            // skip URLs
-            bool bHyperlink = GetTextAttrAt(aScanner.GetBegin(), 
RES_TXTATR_INETFMT) ? true: false;
-
             // get next language for next word, consider language attributes
             // within the word
             LanguageType eActLang = aScanner.GetCurrentLanguage();
             DetectAndMarkMissingDictionaries( GetTextNode()->GetDoc(), 
pArgs->xSpeller, eActLang );
 
-            if( rWord.getLength() > 0 && LANGUAGE_NONE != eActLang && 
!bHyperlink )
+            if( rWord.getLength() > 0 && LANGUAGE_NONE != eActLang &&
+                !lcl_IsURL(rWord, *this, aScanner.GetBegin(), 
aScanner.GetLen() ) )
             {
                 if (pArgs->xSpeller.is())
                 {
@@ -1304,11 +1350,8 @@ SwRect SwTextFrame::AutoSpell_(SwTextNode & rNode, 
sal_Int32 nActPos)
             LanguageType eActLang = aScanner.GetCurrentLanguage();
             DetectAndMarkMissingDictionaries( rDoc, xSpell, eActLang );
 
-            // skip URLs
-            bool bHyperlink = pNode->GetTextAttrAt(nBegin, RES_TXTATR_INETFMT) 
? true: false;
-
             bool bSpell = xSpell.is() && xSpell->hasLanguage( 
static_cast<sal_uInt16>(eActLang) );
-            if( bSpell && !rWord.isEmpty() && !bHyperlink )
+            if( bSpell && !rWord.isEmpty() && !lcl_IsURL(rWord, *pNode, 
nBegin, nLen) )
             {
                 // check for: bAlter => xHyphWord.is()
                 OSL_ENSURE(!bSpell || xSpell.is(), "NULL pointer");

Reply via email to