lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx | 18 +++++----- 1 file changed, 9 insertions(+), 9 deletions(-)
New commits: commit a57c8fdeb21f4ee77763833a3c92ea337a649cda Author: Mike Kaganski <mike.kagan...@collabora.com> AuthorDate: Thu Nov 23 13:21:02 2023 +0100 Commit: Xisco Fauli <xiscofa...@libreoffice.org> CommitDate: Fri Nov 24 16:29:48 2023 +0100 tdf#158117: encode the username sent to LanguageTool Change-Id: I762ebb7487b3bcb80cd88b265b4e75c0d8b7c639 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/159764 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/159910 diff --git a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx index 7e61e7a8cca9..63c8307aadf9 100644 --- a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx +++ b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx @@ -84,15 +84,16 @@ PropertyValue lcl_GetLineColorPropertyFromErrorId(const std::string& rErrorId) return comphelper::makePropertyValue("LineColor", aColor); } -OString encodeTextForLanguageTool(const OUString& text) +OString encodeTextForLT(const OUString& text) { // Let's be a bit conservative. I don't find a good description what needs encoding (and in // which way) at https://languagetool.org/http-api/; the "Try it out!" function shows that // different cases are handled differently by the demo; some percent-encode the UTF-8 // representation, like %D0%90 (for cyrillic А); some turn into entities like ! (for - // exclamation mark !); some other to things like \u0027 (for apostrophe '). + // exclamation mark !); some other to things like \u0027 (for apostrophe '). So only keep + // RFC 3986's "Unreserved Characters" set unencoded, use UTF-8 percent-encoding for the rest. static constexpr auto myCharClass = rtl::createUriCharClass( - u8"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); + u8"-._~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); return OUStringToOString( rtl::Uri::encode(text, myCharClass.data(), rtl_UriEncodeStrict, RTL_TEXTENCODING_UTF8), RTL_TEXTENCODING_ASCII_US); @@ -202,10 +203,9 @@ std::string makeHttpRequest(std::u16string_view aURL, HTTP_METHOD method, const { OString apiKey = OUStringToOString(LanguageToolCfg::ApiKey::get().value_or(""), RTL_TEXTENCODING_UTF8); - OString username = OUStringToOString(LanguageToolCfg::Username::get().value_or(""), - RTL_TEXTENCODING_UTF8); + OUString username = LanguageToolCfg::Username::get().value_or(""); if (!apiKey.isEmpty() && !username.isEmpty()) - realPostData += "&username=" + username + "&apiKey=" + apiKey; + realPostData += "&username=" + encodeTextForLT(username) + "&apiKey=" + apiKey; } return makeHttpRequest_impl(aURL, method, realPostData, nullptr, nStatusCode); @@ -460,7 +460,7 @@ ProofreadingResult SAL_CALL LanguageToolGrammarChecker::doProofreading( } else { - postData = "text=" + encodeTextForLanguageTool(aText) + "&language=" + langTag; + postData = "text=" + encodeTextForLT(aText) + "&language=" + langTag; } if (auto cachedResult = mCachedResults.find(postData); cachedResult != mCachedResults.end()) commit 6b4bcad9da516ee11f52f96452ba83ba5234b1b1 Author: Gökay Şatır <gokaysa...@collabora.com> AuthorDate: Mon Oct 16 12:26:10 2023 +0300 Commit: Xisco Fauli <xiscofa...@libreoffice.org> CommitDate: Fri Nov 24 16:29:42 2023 +0100 Don't encode the text when sending it to Duden corrector. Signed-off-by: Gökay Şatır <gokaysa...@collabora.com> Change-Id: I9e1a2adede04858e5c43b878786bbcc28922aa5f Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158023 Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoff...@gmail.com> (cherry picked from commit 8989cba47fce3763229005b1ed2fec74da7cfb72) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158333 Tested-by: Jenkins Reviewed-by: Caolán McNamara <caolan.mcnam...@collabora.com> Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/159889 diff --git a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx index c5062b84fbe7..7e61e7a8cca9 100644 --- a/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx +++ b/lingucomponent/source/spellcheck/languagetool/languagetoolimp.cxx @@ -444,14 +444,14 @@ ProofreadingResult SAL_CALL LanguageToolGrammarChecker::doProofreading( = std::min(xRes.nStartOfNextSentencePosition, aText.getLength()); OString langTag(LanguageTag::convertToBcp47(aLocale, false).toUtf8()); - OString postData = encodeTextForLanguageTool(aText); + OString postData; const bool bDudenProtocol = LanguageToolCfg::RestProtocol::get().value_or("") == "duden"; if (bDudenProtocol) { std::stringstream aStream; boost::property_tree::ptree aTree; aTree.put("text-language", langTag.getStr()); - aTree.put("text", postData.getStr()); + aTree.put("text", aText.toUtf8()); // We don't encode the text in Duden Corrector tool case. aTree.put("hyphenation", false); aTree.put("spellchecking-level", 3); aTree.put("correction-proposals", true); @@ -460,7 +460,7 @@ ProofreadingResult SAL_CALL LanguageToolGrammarChecker::doProofreading( } else { - postData = "text=" + postData + "&language=" + langTag; + postData = "text=" + encodeTextForLanguageTool(aText) + "&language=" + langTag; } if (auto cachedResult = mCachedResults.find(postData); cachedResult != mCachedResults.end())