On Tue, 10 Oct 2006, Sergiy Vyshnevetskiy wrote:
On Tue, 10 Oct 2006, Tom Lane wrote:
Sergiy Vyshnevetskiy <[EMAIL PROTECTED]> writes:
> > It's not magic, it's from ICU patch. Want me to send you a copy? :)
You're missing my point, which is that non-ICU locale support doesn't
necessarily recognize the same encoding names. We would have done this
years ago if we had a solution to that problem.
We should use IANA-standard names. If it fails - it does nothing.
Anybody porting PostgreSQL to new platform can go over the list and make a
patch for their port.
Here is a new and improved patch, that closes security hole as well. To
prevent DOS attack we lock LC_MESSAGES as C for any database encoding that
we are unable to bind to our textdomain.
--- src/backend/utils/mb/encnames.c.orig Mon Feb 13 00:32:56 2006
+++ src/backend/utils/mb/encnames.c Tue Oct 10 21:16:59 2006
@@ -375,6 +375,114 @@
}
};
+/*
+ * Try to map most internal character encodings to the proper and
+ * preferred IANA string. Use this in mbutils.c to feed gettext info about
+ * the database's character encoding.
+ *
+ * Palle Girgensohn, 2005
+ */
+
+pg_enc2name pg_enc2iananame_tbl[] =
+{
+ {
+ "US-ASCII", PG_SQL_ASCII
+ },
+ {
+ "EUC-JP", PG_EUC_JP
+ },
+ {
+ "GB2312", PG_EUC_CN
+ },
+ {
+ "EUC-KR", PG_EUC_KR
+ },
+ {
+ "ISO-2022-CN", PG_EUC_TW
+ },
+ {
+ "KS_C_5601-1987", PG_JOHAB /* either KS_C_5601-1987 or
ISO-2022-KR ??? */
+ },
+ {
+ "UTF-8", PG_UTF8
+ },
+ {
+ "MULE_INTERNAL", PG_MULE_INTERNAL /* is not for real */
+ },
+ {
+ "ISO-8859-1", PG_LATIN1
+ },
+ {
+ "ISO-8859-2", PG_LATIN2
+ },
+ {
+ "ISO-8859-3", PG_LATIN3
+ },
+ {
+ "ISO-8859-4", PG_LATIN4
+ },
+ {
+ "ISO-8859-9", PG_LATIN5
+ },
+ {
+ "ISO-8859-10", PG_LATIN6
+ },
+ {
+ "ISO-8859-13", PG_LATIN7
+ },
+ {
+ "ISO-8859-14", PG_LATIN8
+ },
+ {
+ "ISO-8859-15", PG_LATIN9
+ },
+ {
+ "ISO-8859-16", PG_LATIN10
+ },
+ {
+ "windows-1256", PG_WIN1256
+ },
+ {
+ "windows-874", PG_WIN874
+ },
+ {
+ "KOI8-R", PG_KOI8R
+ },
+ {
+ "windows-1251", PG_WIN1251
+ },
+ {
+ "ISO-8859-5", PG_ISO_8859_5
+ },
+ {
+ "ISO-8859-6", PG_ISO_8859_6
+ },
+ {
+ "ISO-8859-7", PG_ISO_8859_7
+ },
+ {
+ "ISO-8859-8", PG_ISO_8859_8
+ },
+ {
+ "windows-1250", PG_WIN1250
+ },
+ {
+ "Shift_JIS", PG_SJIS
+ },
+ {
+ "Big5", PG_BIG5
+ },
+ {
+ "GBK", PG_GBK
+ },
+ {
+ "cp949", PG_UHC
+ },
+ {
+ "GB18030", PG_GB18030
+ }
+};
+
/* ----------
* Encoding checks, for error returns -1 else encoding id
* ----------
--- src/backend/utils/mb/mbutils.c.orig Sun May 21 23:05:48 2006
+++ src/backend/utils/mb/mbutils.c Tue Oct 10 22:39:22 2006
@@ -12,6 +12,7 @@
#include "miscadmin.h"
#include "mb/pg_wchar.h"
#include "utils/builtins.h"
+#include "utils/pg_locale.h"
#include "utils/memutils.h"
#include "utils/syscache.h"
#include "catalog/namespace.h"
@@ -611,6 +612,27 @@
DatabaseEncoding = &pg_enc2name_tbl[encoding];
Assert(DatabaseEncoding->encoding == encoding);
+
+ /* try to set charset for messages the same as database charset */
+ if (!bind_textdomain_codeset("postgres",
+ (&pg_enc2iananame_tbl[encoding])->name))
+ {
+ /* when impossible - set LC_MESSAGES to C and lock it there */
+ pg_perm_setlocale(LC_MESSAGES, "C");
+ elog(NOTICE,"cannot set messages encoding for database encoding
%s."
+ ,DatabaseEncoding->name);
+ locale_messages_locked=true;
+#ifdef __USE_GNU_GETTEXT
+ /*
+ * Make change known to GNU gettext.
+ * Code taken from GNU gettext info manual.
+ */
+ {
+ extern int _nl_msg_cat_cntr;
+ ++_nl_msg_cat_cntr;
+ }
+#endif
+ }
}
void
--- src/backend/utils/adt/pg_locale.c.orig Tue Oct 10 22:02:08 2006
+++ src/backend/utils/adt/pg_locale.c Tue Oct 10 22:02:39 2006
@@ -61,6 +61,8 @@
char *locale_numeric;
char *locale_time;
+bool locale_messages_locked=false;
+
/* indicates whether locale information cache is valid */
static bool CurrentLocaleConvValid = false;
@@ -240,6 +242,7 @@
* On Windows, we can't even check the value, so the non-doit case
* is a no-op
*/
+
if(locale_messages_locked)
{
elog(ERROR,"cannot change lc_messages for this database");
--- src/include/utils/pg_locale.h.orig Thu Jan 5 02:54:51 2006
+++ src/include/utils/pg_locale.h Tue Oct 10 22:03:22 2006
@@ -22,6 +22,8 @@
extern char *locale_numeric;
extern char *locale_time;
+extern bool locale_messages_locked;
+
extern const char *locale_messages_assign(const char *value,
bool doit, GucSource source);
extern const char *locale_monetary_assign(const char *value,
---------------------------(end of broadcast)---------------------------
TIP 1: if posting/reading through Usenet, please send an appropriate
subscribe-nomail command to [EMAIL PROTECTED] so that your
message can get through to the mailing list cleanly