[Libreoffice-commits] core.git: Branch 'libreoffice-7-6' - 2 commits - lingucomponent/source

Mike Kaganski (via logerrit) Fri, 24 Nov 2023 07:30:08 -0800

 lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx |   18 
+++++-----
 1 file changed, 9 insertions(+), 9 deletions(-)


New commits:
commit a57c8fdeb21f4ee77763833a3c92ea337a649cda
Author:     Mike Kaganski <mike.kagan...@collabora.com>
AuthorDate: Thu Nov 23 13:21:02 2023 +0100
Commit:     Xisco Fauli <xiscofa...@libreoffice.org>
CommitDate: Fri Nov 24 16:29:48 2023 +0100

    tdf#158117: encode the username sent to LanguageTool
    
    Change-Id: I762ebb7487b3bcb80cd88b265b4e75c0d8b7c639
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/159764
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>
    Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org>
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/159910

diff --git a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx 
b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx
index 7e61e7a8cca9..63c8307aadf9 100644
--- a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx
+++ b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx
@@ -84,15 +84,16 @@ PropertyValue lcl_GetLineColorPropertyFromErrorId(const 
std::string& rErrorId)
     return comphelper::makePropertyValue("LineColor", aColor);
 }
 
-OString encodeTextForLanguageTool(const OUString& text)
+OString encodeTextForLT(const OUString& text)
 {
     // Let's be a bit conservative. I don't find a good description what needs 
encoding (and in
     // which way) at https://languagetool.org/http-api/; the "Try it out!" 
function shows that
     // different cases are handled differently by the demo; some 
percent-encode the UTF-8
     // representation, like %D0%90 (for cyrillic А); some turn into entities 
like &#33; (for
-    // exclamation mark !); some other to things like \u0027 (for apostrophe 
').
+    // exclamation mark !); some other to things like \u0027 (for apostrophe 
'). So only keep
+    // RFC 3986's "Unreserved Characters" set unencoded, use UTF-8 
percent-encoding for the rest.
     static constexpr auto myCharClass = rtl::createUriCharClass(
-        u8"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
+        
u8"-._~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
     return OUStringToOString(
         rtl::Uri::encode(text, myCharClass.data(), rtl_UriEncodeStrict, 
RTL_TEXTENCODING_UTF8),
         RTL_TEXTENCODING_ASCII_US);
@@ -202,10 +203,9 @@ std::string makeHttpRequest(std::u16string_view aURL, 
HTTP_METHOD method, const
     {
         OString apiKey
             = OUStringToOString(LanguageToolCfg::ApiKey::get().value_or(""), 
RTL_TEXTENCODING_UTF8);
-        OString username = 
OUStringToOString(LanguageToolCfg::Username::get().value_or(""),
-                                             RTL_TEXTENCODING_UTF8);
+        OUString username = LanguageToolCfg::Username::get().value_or("");
         if (!apiKey.isEmpty() && !username.isEmpty())
-            realPostData += "&username=" + username + "&apiKey=" + apiKey;
+            realPostData += "&username=" + encodeTextForLT(username) + 
"&apiKey=" + apiKey;
     }
 
     return makeHttpRequest_impl(aURL, method, realPostData, nullptr, 
nStatusCode);
@@ -460,7 +460,7 @@ ProofreadingResult SAL_CALL 
LanguageToolGrammarChecker::doProofreading(
     }
     else
     {
-        postData = "text=" + encodeTextForLanguageTool(aText) + "&language=" + 
langTag;
+        postData = "text=" + encodeTextForLT(aText) + "&language=" + langTag;
     }
 
     if (auto cachedResult = mCachedResults.find(postData); cachedResult != 
mCachedResults.end())
commit 6b4bcad9da516ee11f52f96452ba83ba5234b1b1
Author:     Gökay Şatır <gokaysa...@collabora.com>
AuthorDate: Mon Oct 16 12:26:10 2023 +0300
Commit:     Xisco Fauli <xiscofa...@libreoffice.org>
CommitDate: Fri Nov 24 16:29:42 2023 +0100

    Don't encode the text when sending it to Duden corrector.
    
    Signed-off-by: Gökay Şatır <gokaysa...@collabora.com>
    Change-Id: I9e1a2adede04858e5c43b878786bbcc28922aa5f
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158023
    Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoff...@gmail.com>
    (cherry picked from commit 8989cba47fce3763229005b1ed2fec74da7cfb72)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158333
    Tested-by: Jenkins
    Reviewed-by: Caolán McNamara <caolan.mcnam...@collabora.com>
    Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org>
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/159889

diff --git a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx 
b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx
index c5062b84fbe7..7e61e7a8cca9 100644
--- a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx
+++ b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx
@@ -444,14 +444,14 @@ ProofreadingResult SAL_CALL 
LanguageToolGrammarChecker::doProofreading(
         = std::min(xRes.nStartOfNextSentencePosition, aText.getLength());
 
     OString langTag(LanguageTag::convertToBcp47(aLocale, false).toUtf8());
-    OString postData = encodeTextForLanguageTool(aText);
+    OString postData;
     const bool bDudenProtocol = 
LanguageToolCfg::RestProtocol::get().value_or("") == "duden";
     if (bDudenProtocol)
     {
         std::stringstream aStream;
         boost::property_tree::ptree aTree;
         aTree.put("text-language", langTag.getStr());
-        aTree.put("text", postData.getStr());
+        aTree.put("text", aText.toUtf8()); // We don't encode the text in 
Duden Corrector tool case.
         aTree.put("hyphenation", false);
         aTree.put("spellchecking-level", 3);
         aTree.put("correction-proposals", true);
@@ -460,7 +460,7 @@ ProofreadingResult SAL_CALL 
LanguageToolGrammarChecker::doProofreading(
     }
     else
     {
-        postData = "text=" + postData + "&language=" + langTag;
+        postData = "text=" + encodeTextForLanguageTool(aText) + "&language=" + 
langTag;
     }
 
     if (auto cachedResult = mCachedResults.find(postData); cachedResult != 
mCachedResults.end())

[Libreoffice-commits] core.git: Branch 'libreoffice-7-6' - 2 commits - lingucomponent/source

Reply via email to