Currently pgagent doesn't handle unicode correctly.

CharToWString function corrupt multibyte characters because it processes
string one byte at a time:
 148         std::string s = std::string(cstr);
 149         std::wstring wsTmp(s.begin(), s.end());

WStringToChar function does not take into account that there can be
_multi_byte character on wcstombs output and create buffer with
size = wcslen:
 157         int wstr_length = wcslen(wchar_str);
 158         char *dst = new char[wstr_length + 10];

Also pgagent do not setup locale with setlocale(), without it all
wcs/mbs functions cannot handle multibyte strings.

For example:

=== step code ===
select 'это проверка кириллицы в теле запроса pgagent'
=================

=== postgres log ===
2021-02-05 23:19:05 UTC [15600-1] postgres@postgres ERROR:  unterminated quoted 
string at or near "'" at character 8
2021-02-05 23:19:05 UTC [15600-2] postgres@postgres STATEMENT:  select '
====================

Please see attached patch.
I only test it on GNU/Linux and can't test it on Windows, sorry.

-- 
Sergey Burladyan

commit b9cf098a4d0df53b7b623a0de844fce834bf7be1 (HEAD -> x5)
Author: Sergey Burladyan <eshkin...@gmail.com>
Date:   Sat Feb 6 06:16:59 2021

    Fix multibyte strings handling

diff --git a/misc.cpp b/misc.cpp
index 35ac83d..17103c7 100644
--- a/misc.cpp
+++ b/misc.cpp
@@ -145,20 +145,49 @@ std::wstring NumToStr(const long l)
 // This function is used to convert char* to std::wstring.
 std::wstring CharToWString(const char* cstr)
 {
-	std::string s = std::string(cstr);
-	std::wstring wsTmp(s.begin(), s.end());
-	return wsTmp;
+	size_t wc_cnt = mbstowcs(NULL, cstr, 0);
+
+	if (wc_cnt == (size_t) -1) {
+		return std::wstring();
+	}
+
+	wchar_t *wcs = new wchar_t[wc_cnt + 1];
+	if (wcs == NULL) {
+		return std::wstring();
+	}
+
+	if (mbstowcs(wcs, cstr, wc_cnt + 1) == (size_t) -1) {
+		return std::wstring();
+	}
+
+	std::wstring tmp(&wcs[0], &wcs[wc_cnt]);
+	delete [] wcs;
+
+	return tmp;
 }
 
 // This function is used to convert std::wstring to char *.
 char * WStringToChar(const std::wstring &wstr)
 {
+	static char *err = (char*)"";
 	const wchar_t *wchar_str = wstr.c_str();
-	int wstr_length = wcslen(wchar_str);
-	char *dst = new char[wstr_length + 10];
-	memset(dst, 0x00, (wstr_length + 10));
-	wcstombs(dst, wchar_str, wstr_length);
-	return dst;
+	int mb_len = wcstombs(NULL, wchar_str, 0);
+
+	if (mb_len == (size_t) -1) {
+		return err;
+	}
+
+	char *mbs = new char[mb_len + 1];
+	if (mbs == NULL) {
+		return err;
+	}
+	memset(mbs, 0, mb_len + 1);
+
+	if (wcstombs(mbs, wchar_str, mb_len + 1) == (size_t) -1) {
+		return err;
+	}
+
+	return mbs;
 }
 
 // Below function will generate random string of given character.
diff --git a/unix.cpp b/unix.cpp
index 9a41e38..d4b0d3d 100644
--- a/unix.cpp
+++ b/unix.cpp
@@ -155,6 +155,8 @@ static void daemonize(void)
 
 int main(int argc, char **argv)
 {
+	setlocale(LC_ALL, "");
+
 	std::wstring executable;
 	executable.assign(CharToWString(argv[0]));
 

Reply via email to