Currently pgagent doesn't handle unicode correctly. CharToWString function corrupt multibyte characters because it processes string one byte at a time: 148 std::string s = std::string(cstr); 149 std::wstring wsTmp(s.begin(), s.end());
WStringToChar function does not take into account that there can be _multi_byte character on wcstombs output and create buffer with size = wcslen: 157 int wstr_length = wcslen(wchar_str); 158 char *dst = new char[wstr_length + 10]; Also pgagent do not setup locale with setlocale(), without it all wcs/mbs functions cannot handle multibyte strings. For example: === step code === select 'это проверка кириллицы в теле запроса pgagent' ================= === postgres log === 2021-02-05 23:19:05 UTC [15600-1] postgres@postgres ERROR: unterminated quoted string at or near "'" at character 8 2021-02-05 23:19:05 UTC [15600-2] postgres@postgres STATEMENT: select ' ==================== Please see attached patch. I only test it on GNU/Linux and can't test it on Windows, sorry. -- Sergey Burladyan
commit b9cf098a4d0df53b7b623a0de844fce834bf7be1 (HEAD -> x5) Author: Sergey Burladyan <eshkin...@gmail.com> Date: Sat Feb 6 06:16:59 2021 Fix multibyte strings handling diff --git a/misc.cpp b/misc.cpp index 35ac83d..17103c7 100644 --- a/misc.cpp +++ b/misc.cpp @@ -145,20 +145,49 @@ std::wstring NumToStr(const long l) // This function is used to convert char* to std::wstring. std::wstring CharToWString(const char* cstr) { - std::string s = std::string(cstr); - std::wstring wsTmp(s.begin(), s.end()); - return wsTmp; + size_t wc_cnt = mbstowcs(NULL, cstr, 0); + + if (wc_cnt == (size_t) -1) { + return std::wstring(); + } + + wchar_t *wcs = new wchar_t[wc_cnt + 1]; + if (wcs == NULL) { + return std::wstring(); + } + + if (mbstowcs(wcs, cstr, wc_cnt + 1) == (size_t) -1) { + return std::wstring(); + } + + std::wstring tmp(&wcs[0], &wcs[wc_cnt]); + delete [] wcs; + + return tmp; } // This function is used to convert std::wstring to char *. char * WStringToChar(const std::wstring &wstr) { + static char *err = (char*)""; const wchar_t *wchar_str = wstr.c_str(); - int wstr_length = wcslen(wchar_str); - char *dst = new char[wstr_length + 10]; - memset(dst, 0x00, (wstr_length + 10)); - wcstombs(dst, wchar_str, wstr_length); - return dst; + int mb_len = wcstombs(NULL, wchar_str, 0); + + if (mb_len == (size_t) -1) { + return err; + } + + char *mbs = new char[mb_len + 1]; + if (mbs == NULL) { + return err; + } + memset(mbs, 0, mb_len + 1); + + if (wcstombs(mbs, wchar_str, mb_len + 1) == (size_t) -1) { + return err; + } + + return mbs; } // Below function will generate random string of given character. diff --git a/unix.cpp b/unix.cpp index 9a41e38..d4b0d3d 100644 --- a/unix.cpp +++ b/unix.cpp @@ -155,6 +155,8 @@ static void daemonize(void) int main(int argc, char **argv) { + setlocale(LC_ALL, ""); + std::wstring executable; executable.assign(CharToWString(argv[0]));