Hiroshi Inoue wrote: > Bruce Momjian wrote: > > Hiroshi Inoue wrote: > >> Bruce Momjian wrote: > >>> Where are we on this issue? > >> Oops I forgot it completely. > >> I have a little improved version and would post it tonight. > > > > Ah, very good. Thanks. > > Attached is an improved version.
I spent many hours on this patch and am attaching an updated version. I have restructured the code and added many comments, but this is the main one: * Ideally, the server encoding and locale settings would * always match. Unfortunately, WIN32 does not support UTF-8 * values for setlocale(), even though PostgreSQL runs fine with * a UTF-8 encoding on Windows: * * http://msdn.microsoft.com/en-us/library/x99tb11d.aspx * * Therefore, we must set LC_CTYPE to match LC_NUMERIC and * LC_MONETARY, call localeconv(), and use mbstowcs() to * convert the locale-aware string, e.g. Euro symbol, which * is not in UTF-8 to the server encoding. I need someone with WIN32 experience to review and test this patch. -- Bruce Momjian <br...@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com PG East: http://www.enterprisedb.com/community/nav-pg-east-2010.do
Index: src/backend/utils/adt/pg_locale.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/pg_locale.c,v retrieving revision 1.53 diff -c -c -r1.53 pg_locale.c *** src/backend/utils/adt/pg_locale.c 27 Feb 2010 20:20:44 -0000 1.53 --- src/backend/utils/adt/pg_locale.c 28 Feb 2010 03:59:14 -0000 *************** *** 4,10 **** * * Portions Copyright (c) 2002-2010, PostgreSQL Global Development Group * ! * $PostgreSQL: pgsql/src/backend/utils/adt/pg_locale.c,v 1.53 2010/02/27 20:20:44 momjian Exp $ * *----------------------------------------------------------------------- */ --- 4,10 ---- * * Portions Copyright (c) 2002-2010, PostgreSQL Global Development Group * ! * $PostgreSQL: pgsql/src/backend/utils/adt/pg_locale.c,v 1.51 2010/01/02 16:57:54 momjian Exp $ * *----------------------------------------------------------------------- */ *************** *** 386,391 **** --- 386,459 ---- free(s->positive_sign); } + #ifdef WIN32 + /* + * This converts the LC_CTYPE-encoded string returned from the + * locale routines to the database encoding. + */ + static char *db_encoding_strdup(const char *item, const char *str) + { + int db_encoding = GetDatabaseEncoding(); + size_t wchars, ilen, wclen, dstlen; + int utflen, bytes_per_char; + wchar_t *wbuf; + char *dst; + + if (!str[0]) + return strdup(str); + ilen = strlen(str) + 1; + wclen = ilen * sizeof(wchar_t); + wbuf = (wchar_t *) palloc(wclen); + + /* Convert multi-byte string using current LC_CTYPE to a wide-character string */ + wchars = mbstowcs(wbuf, str, ilen); + if (wchars == (size_t) -1) + elog(ERROR, + "could not convert string to wide characters: error %lu", GetLastError()); + + /* allocate target string */ + bytes_per_char = pg_encoding_max_length(PG_UTF8); + if (pg_encoding_max_length(db_encoding) > bytes_per_char) + bytes_per_char = pg_encoding_max_length(db_encoding); + dstlen = wchars * bytes_per_char + 1; + if ((dst = malloc(dstlen)) == NULL) + elog(ERROR, "could not allocate a destination buffer"); + + /* Convert wide string to UTF8 */ + utflen = WideCharToMultiByte(CP_UTF8, 0, wbuf, wchars, dst, dstlen, NULL, NULL); + if (utflen == 0) + elog(ERROR, + "could not convert string %04x to UTF-8: error %lu", wbuf[0], GetLastError()); + pfree(wbuf); + + dst[utflen] = '\0'; + if (db_encoding != PG_UTF8) + { + PG_TRY(); + { + char *convstr = pg_do_encoding_conversion(dst, utflen, PG_UTF8, db_encoding); + if (dst != convstr) + { + strlcpy(dst, convstr, dstlen); + pfree(convstr); + } + } + PG_CATCH(); + { + FlushErrorState(); + dst[0] = '\0'; + } + PG_END_TRY(); + } + + return dst; + } + #else + static char *db_encoding_strdup(const char *item, const char *str) + { + return strdup(str); + } + #endif /* WIN32 */ /* * Return the POSIX lconv struct (contains number/money formatting *************** *** 398,403 **** --- 466,475 ---- struct lconv *extlconv; char *save_lc_monetary; char *save_lc_numeric; + #ifdef WIN32 + char *save_lc_ctype = NULL; + bool lc_ctype_was_null = false; + #endif /* Did we do it already? */ if (CurrentLocaleConvValid) *************** *** 413,442 **** if (save_lc_numeric) save_lc_numeric = pstrdup(save_lc_numeric); setlocale(LC_MONETARY, locale_monetary); setlocale(LC_NUMERIC, locale_numeric); ! ! /* Get formatting information */ extlconv = localeconv(); /* ! * Must copy all values since restoring internal settings may overwrite * localeconv()'s results. */ CurrentLocaleConv = *extlconv; ! CurrentLocaleConv.currency_symbol = strdup(extlconv->currency_symbol); ! CurrentLocaleConv.decimal_point = strdup(extlconv->decimal_point); ! CurrentLocaleConv.grouping = strdup(extlconv->grouping); ! CurrentLocaleConv.thousands_sep = strdup(extlconv->thousands_sep); ! CurrentLocaleConv.int_curr_symbol = strdup(extlconv->int_curr_symbol); ! CurrentLocaleConv.mon_decimal_point = strdup(extlconv->mon_decimal_point); CurrentLocaleConv.mon_grouping = strdup(extlconv->mon_grouping); ! CurrentLocaleConv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep); ! CurrentLocaleConv.negative_sign = strdup(extlconv->negative_sign); ! CurrentLocaleConv.positive_sign = strdup(extlconv->positive_sign); CurrentLocaleConv.n_sign_posn = extlconv->n_sign_posn; ! /* Try to restore internal settings */ if (save_lc_monetary) { setlocale(LC_MONETARY, save_lc_monetary); --- 485,564 ---- if (save_lc_numeric) save_lc_numeric = pstrdup(save_lc_numeric); + #ifdef WIN32 + /* + * Ideally, the server encoding and locale settings would + * always match. Unfortunately, WIN32 does not support UTF-8 + * values for setlocale(), even though PostgreSQL runs fine with + * a UTF-8 encoding on Windows: + * + * http://msdn.microsoft.com/en-us/library/x99tb11d.aspx + * + * Therefore, we must set LC_CTYPE to match LC_NUMERIC and + * LC_MONETARY, call localeconv(), and use mbstowcs() to + * convert the locale-aware string, e.g. Euro symbol, which + * is not in UTF-8 to the server encoding. + */ + + /* + * We unconditionally restore LC_CTYPE because we are setting it + * to an unusual value. + */ + if ((save_lc_ctype = setlocale(LC_CTYPE, NULL)) != NULL) + save_lc_ctype = pstrdup(save_lc_ctype); + else + /* This is actually the C locale */ + save_lc_ctype = pstrdup(""); + + /* Set LC_CTYPE to match LC_MONETARY? */ + if (pg_strcasecmp(save_lc_ctype, locale_monetary) != 0) + setlocale(LC_CTYPE, locale_monetary); + #endif + setlocale(LC_MONETARY, locale_monetary); setlocale(LC_NUMERIC, locale_numeric); ! /* ! * Get formatting information for LC_MONETARY, and LC_NUMERIC if they ! * are the same. ! */ extlconv = localeconv(); /* ! * Must copy all values since restoring internal settings might overwrite * localeconv()'s results. */ CurrentLocaleConv = *extlconv; ! ! /* The first argument of db_encoding_strdup() is only used on WIN32 */ ! CurrentLocaleConv.currency_symbol = db_encoding_strdup("currency_symbol", extlconv->currency_symbol); ! CurrentLocaleConv.int_curr_symbol = db_encoding_strdup("int_curr_symbol", extlconv->int_curr_symbol); ! CurrentLocaleConv.mon_decimal_point = db_encoding_strdup("mon_decimal_point", extlconv->mon_decimal_point); CurrentLocaleConv.mon_grouping = strdup(extlconv->mon_grouping); ! CurrentLocaleConv.mon_thousands_sep = db_encoding_strdup("mon_thousands_sep", extlconv->mon_thousands_sep); ! CurrentLocaleConv.negative_sign = db_encoding_strdup("negative_sign", extlconv->negative_sign); ! CurrentLocaleConv.positive_sign = db_encoding_strdup("positive_sign", extlconv->positive_sign); CurrentLocaleConv.n_sign_posn = extlconv->n_sign_posn; ! #ifdef WIN32 ! if (pg_strcasecmp(locale_numeric, locale_monetary) != 0) ! { ! setlocale(LC_CTYPE, locale_numeric); ! /* Get formatting information for LC_NUMERIC with matching LC_CTYPE */ ! extlconv = localeconv(); ! } ! #endif ! ! CurrentLocaleConv.decimal_point = db_encoding_strdup("decimal_point", extlconv->decimal_point); ! CurrentLocaleConv.grouping = strdup(extlconv->grouping); ! CurrentLocaleConv.thousands_sep = db_encoding_strdup("thousands_sep", extlconv->thousands_sep); ! ! /* ! * Restore internal settings ! */ ! #ifdef WIN32 ! setlocale(LC_CTYPE, save_lc_ctype); ! pfree(save_lc_ctype); ! #endif if (save_lc_monetary) { setlocale(LC_MONETARY, save_lc_monetary); *************** *** 533,542 **** elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time); #ifdef WIN32 ! /* set user's value of ctype locale */ save_lc_ctype = setlocale(LC_CTYPE, NULL); if (save_lc_ctype) save_lc_ctype = pstrdup(save_lc_ctype); setlocale(LC_CTYPE, locale_time); #endif --- 655,666 ---- elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time); #ifdef WIN32 ! /* See the WIN32 comment near the top of PGLC_localeconv() */ save_lc_ctype = setlocale(LC_CTYPE, NULL); if (save_lc_ctype) save_lc_ctype = pstrdup(save_lc_ctype); + else + save_lc_ctype = pstrdup(""); setlocale(LC_CTYPE, locale_time); #endif
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers