Hiroshi Inoue wrote: > >>>> I need someone with WIN32 experience to review and test this patch. > >>> I don't understand why cache_locale_time() works on Windows. It sets > >>> the LC_CTYPE but does not do any encoding coversion. > >> Doesn't strftime_win32 do the conversion? > > > > Oh, I now see strftime is redefined as a macro in that C files. Thanks. > > > >>> Do month and > >>> day-of-week names not work either, or do they work and the encoding > >>> conversion for numeric/money, e.g. Euro, it not necessary? > >> db_strdup does the conversion. > > > > Should we pull the encoding conversion into a separate function and have > > strftime_win32() and db_strdup() both call it? > > We may be able to pull the conversion WideChars => UTF8 => > a PG encoding into an function.
OK, I have created a new function, win32_wchar_to_db_encoding(), to share the conversion from wide characters to the database encoding. New patch attached. > BTW both PGLC_localeconv() and cache_locale_time() save the current > LC_CTYPE first and restore them just before returning the functions. > I'm suspicious if it's OK when errors occur in middle of the functions. Yea, I added a comment questioning if that is a problem. -- Bruce Momjian <br...@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com PG East: http://www.enterprisedb.com/community/nav-pg-east-2010.do
Index: src/backend/utils/adt/pg_locale.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/pg_locale.c,v retrieving revision 1.53 diff -c -c -r1.53 pg_locale.c *** src/backend/utils/adt/pg_locale.c 27 Feb 2010 20:20:44 -0000 1.53 --- src/backend/utils/adt/pg_locale.c 2 Mar 2010 18:11:41 -0000 *************** *** 4,10 **** * * Portions Copyright (c) 2002-2010, PostgreSQL Global Development Group * ! * $PostgreSQL: pgsql/src/backend/utils/adt/pg_locale.c,v 1.53 2010/02/27 20:20:44 momjian Exp $ * *----------------------------------------------------------------------- */ --- 4,10 ---- * * Portions Copyright (c) 2002-2010, PostgreSQL Global Development Group * ! * $PostgreSQL: pgsql/src/backend/utils/adt/pg_locale.c,v 1.51 2010/01/02 16:57:54 momjian Exp $ * *----------------------------------------------------------------------- */ *************** *** 96,101 **** --- 96,109 ---- static char *IsoLocaleName(const char *); /* MSVC specific */ #endif + #ifdef WIN32 + static size_t win32_wchar_to_db_encoding(const wchar_t *wbuf, + const size_t wchars, char *dst, size_t dstlen); + static char *db_encoding_strdup(const char *item, const char *str); + static size_t strftime_win32(char *dst, size_t dstlen, const wchar_t *format, + const struct tm *tm); + #endif + /* * pg_perm_setlocale *************** *** 387,392 **** --- 395,488 ---- } + #ifdef WIN32 + /* + * Convert wide character string (UTF16 on Win32) to UTF8, and then + * optionally to the db encoding. + */ + static size_t win32_wchar_to_db_encoding(const wchar_t *wbuf, + const size_t wchars, char *dst, size_t dstlen) + { + int db_encoding = GetDatabaseEncoding(); + int utf8len; + + /* Convert wide string (UTF16) to UTF8 */ + utf8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, wchars, dst, dstlen, NULL, NULL); + if (utf8len == 0) + /* Does this leave LC_CTYPE set incorrectly? */ + elog(ERROR, + "could not convert string %04x to UTF-8: error %lu", wbuf[0], GetLastError()); + pfree(wbuf); + + dst[utf8len] = '\0'; + if (db_encoding != PG_UTF8) + { + PG_TRY(); + { + char *convstr = pg_do_encoding_conversion(dst, utf8len, PG_UTF8, db_encoding); + if (dst != convstr) + { + strlcpy(dst, convstr, dstlen); + pfree(convstr); + } + } + PG_CATCH(); + { + FlushErrorState(); + dst[0] = '\0'; + } + PG_END_TRY(); + } + + return pg_mbstrlen(dst); + } + + /* + * This converts the LC_CTYPE-encoded string returned from the + * locale routines to the database encoding. + */ + static char *db_encoding_strdup(const char *item, const char *str) + { + int db_encoding = GetDatabaseEncoding(); + size_t wchars, ilen, wclen, dstlen; + int bytes_per_char; + wchar_t *wbuf; + char *dst; + + if (!str[0]) + return strdup(str); + + /* allocate wide character string */ + ilen = strlen(str) + 1; + wclen = ilen * sizeof(wchar_t); + wbuf = (wchar_t *) palloc(wclen); + + /* Convert multi-byte string using current LC_CTYPE to a wide-character string */ + wchars = mbstowcs(wbuf, str, ilen); + if (wchars == (size_t) -1) + elog(ERROR, + "could not convert string to wide characters: error %lu", GetLastError()); + + /* allocate target string */ + bytes_per_char = pg_encoding_max_length(PG_UTF8); + if (pg_encoding_max_length(db_encoding) > bytes_per_char) + bytes_per_char = pg_encoding_max_length(db_encoding); + dstlen = wchars * bytes_per_char + 1; + if ((dst = malloc(dstlen)) == NULL) + elog(ERROR, "could not allocate a destination buffer"); + + /* Convert wide string (UTF16) to db encoding */ + win32_wchar_to_db_encoding(wbuf, wchars, dst, dstlen); + + return dst; + } + #else + static char *db_encoding_strdup(const char *item, const char *str) + { + return strdup(str); + } + #endif /* WIN32 */ + /* * Return the POSIX lconv struct (contains number/money formatting * information) with locale information for all categories. *************** *** 398,403 **** --- 494,502 ---- struct lconv *extlconv; char *save_lc_monetary; char *save_lc_numeric; + #ifdef WIN32 + char *save_lc_ctype = NULL; + #endif /* Did we do it already? */ if (CurrentLocaleConvValid) *************** *** 413,442 **** if (save_lc_numeric) save_lc_numeric = pstrdup(save_lc_numeric); setlocale(LC_MONETARY, locale_monetary); setlocale(LC_NUMERIC, locale_numeric); ! ! /* Get formatting information */ extlconv = localeconv(); /* ! * Must copy all values since restoring internal settings may overwrite * localeconv()'s results. */ CurrentLocaleConv = *extlconv; ! CurrentLocaleConv.currency_symbol = strdup(extlconv->currency_symbol); ! CurrentLocaleConv.decimal_point = strdup(extlconv->decimal_point); ! CurrentLocaleConv.grouping = strdup(extlconv->grouping); ! CurrentLocaleConv.thousands_sep = strdup(extlconv->thousands_sep); ! CurrentLocaleConv.int_curr_symbol = strdup(extlconv->int_curr_symbol); ! CurrentLocaleConv.mon_decimal_point = strdup(extlconv->mon_decimal_point); CurrentLocaleConv.mon_grouping = strdup(extlconv->mon_grouping); ! CurrentLocaleConv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep); ! CurrentLocaleConv.negative_sign = strdup(extlconv->negative_sign); ! CurrentLocaleConv.positive_sign = strdup(extlconv->positive_sign); CurrentLocaleConv.n_sign_posn = extlconv->n_sign_posn; ! /* Try to restore internal settings */ if (save_lc_monetary) { setlocale(LC_MONETARY, save_lc_monetary); --- 512,588 ---- if (save_lc_numeric) save_lc_numeric = pstrdup(save_lc_numeric); + #ifdef WIN32 + /* + * Ideally, the db server encoding and locale settings would + * always match. Unfortunately, WIN32 does not support UTF-8 + * values for setlocale(), even though PostgreSQL runs fine with + * a UTF-8 encoding on Windows: + * + * http://msdn.microsoft.com/en-us/library/x99tb11d.aspx + * + * Therefore, we must set LC_CTYPE to match LC_NUMERIC and + * LC_MONETARY, call localeconv(), and use mbstowcs() to + * convert the locale-aware string, e.g. Euro symbol, which + * is not in UTF-8 to the server encoding. + */ + + if ((save_lc_ctype = setlocale(LC_CTYPE, NULL)) != NULL) + { + save_lc_ctype = pstrdup(save_lc_ctype); + /* Set LC_CTYPE to match LC_MONETARY? */ + if (pg_strcasecmp(save_lc_ctype, locale_monetary) != 0) + setlocale(LC_CTYPE, locale_monetary); + } + #endif + setlocale(LC_MONETARY, locale_monetary); setlocale(LC_NUMERIC, locale_numeric); ! /* ! * Get formatting information for LC_MONETARY, and LC_NUMERIC if they ! * are the same. ! */ extlconv = localeconv(); /* ! * Must copy all values since restoring internal settings might overwrite * localeconv()'s results. */ CurrentLocaleConv = *extlconv; ! ! /* The first argument of db_encoding_strdup() is only used on WIN32 */ ! CurrentLocaleConv.currency_symbol = db_encoding_strdup("currency_symbol", extlconv->currency_symbol); ! CurrentLocaleConv.int_curr_symbol = db_encoding_strdup("int_curr_symbol", extlconv->int_curr_symbol); ! CurrentLocaleConv.mon_decimal_point = db_encoding_strdup("mon_decimal_point", extlconv->mon_decimal_point); CurrentLocaleConv.mon_grouping = strdup(extlconv->mon_grouping); ! CurrentLocaleConv.mon_thousands_sep = db_encoding_strdup("mon_thousands_sep", extlconv->mon_thousands_sep); ! CurrentLocaleConv.negative_sign = db_encoding_strdup("negative_sign", extlconv->negative_sign); ! CurrentLocaleConv.positive_sign = db_encoding_strdup("positive_sign", extlconv->positive_sign); CurrentLocaleConv.n_sign_posn = extlconv->n_sign_posn; ! #ifdef WIN32 ! if (save_lc_ctype && pg_strcasecmp(locale_numeric, locale_monetary) != 0) ! { ! setlocale(LC_CTYPE, locale_numeric); ! /* Get formatting information for LC_NUMERIC with matching LC_CTYPE */ ! extlconv = localeconv(); ! } ! #endif ! ! CurrentLocaleConv.decimal_point = db_encoding_strdup("decimal_point", extlconv->decimal_point); ! CurrentLocaleConv.grouping = strdup(extlconv->grouping); ! CurrentLocaleConv.thousands_sep = db_encoding_strdup("thousands_sep", extlconv->thousands_sep); ! ! /* ! * Restore internal settings ! */ ! #ifdef WIN32 ! if (save_lc_ctype) ! { ! setlocale(LC_CTYPE, save_lc_ctype); ! pfree(save_lc_ctype); ! } ! #endif if (save_lc_monetary) { setlocale(LC_MONETARY, save_lc_monetary); *************** *** 455,483 **** #ifdef WIN32 /* ! * On win32, strftime() returns the encoding in CP_ACP, which is likely ! * different from SERVER_ENCODING. This is especially important in Japanese ! * versions of Windows which will use SJIS encoding, which we don't support ! * as a server encoding. ! * ! * Replace strftime() with a version that gets the string in UTF16 and then ! * converts it to the appropriate encoding as necessary. * * Note that this only affects the calls to strftime() in this file, which are * used to get the locale-aware strings. Other parts of the backend use * pg_strftime(), which isn't locale-aware and does not need to be replaced. */ static size_t ! strftime_win32(char *dst, size_t dstlen, const wchar_t *format, const struct tm * tm) { ! size_t len; wchar_t wbuf[MAX_L10N_DATA]; - int encoding; ! encoding = GetDatabaseEncoding(); ! ! len = wcsftime(wbuf, MAX_L10N_DATA, format, tm); ! if (len == 0) /* * strftime call failed - return 0 with the contents of dst --- 601,628 ---- #ifdef WIN32 /* ! * On WIN32, strftime() returns the encoding in CP_ACP (the default ! * operating system codpage for that computer), which is likely different ! * from SERVER_ENCODING. This is especially important in Japanese versions ! * of Windows which will use SJIS encoding, which we don't support as a ! * server encoding. ! * ! * So, instead of using strftime(), use wcsftime() to return the value in ! * wide characters (internally UTF16) and then convert it to the appropriate ! * database encoding. * * Note that this only affects the calls to strftime() in this file, which are * used to get the locale-aware strings. Other parts of the backend use * pg_strftime(), which isn't locale-aware and does not need to be replaced. */ static size_t ! strftime_win32(char *dst, size_t dstlen, const wchar_t *format, const struct tm *tm) { ! size_t wchars; wchar_t wbuf[MAX_L10N_DATA]; ! wchars = wcsftime(wbuf, MAX_L10N_DATA, format, tm); ! if (wchars == 0) /* * strftime call failed - return 0 with the contents of dst *************** *** 485,511 **** */ return 0; ! len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen, NULL, NULL); ! if (len == 0) ! elog(ERROR, ! "could not convert string to UTF-8:error %lu", GetLastError()); ! ! dst[len] = '\0'; ! if (encoding != PG_UTF8) ! { ! char *convstr = pg_do_encoding_conversion(dst, len, PG_UTF8, encoding); ! ! if (dst != convstr) ! { ! strlcpy(dst, convstr, dstlen); ! len = strlen(dst); ! } ! } ! ! return len; } #define strftime(a,b,c,d) strftime_win32(a,b,L##c,d) #endif /* WIN32 */ --- 630,641 ---- */ return 0; ! return win32_wchar_to_db_encoding(wbuf, wchars, dst, dstlen); } + /* redefine strftime() */ #define strftime(a,b,c,d) strftime_win32(a,b,L##c,d) + #endif /* WIN32 */ *************** *** 533,542 **** elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time); #ifdef WIN32 ! /* set user's value of ctype locale */ save_lc_ctype = setlocale(LC_CTYPE, NULL); if (save_lc_ctype) save_lc_ctype = pstrdup(save_lc_ctype); setlocale(LC_CTYPE, locale_time); #endif --- 663,674 ---- elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time); #ifdef WIN32 ! /* See the WIN32 comment near the top of PGLC_localeconv() */ save_lc_ctype = setlocale(LC_CTYPE, NULL); if (save_lc_ctype) save_lc_ctype = pstrdup(save_lc_ctype); + else + save_lc_ctype = pstrdup(""); setlocale(LC_CTYPE, locale_time); #endif
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers