On Tue, 10 Oct 2006, Sergiy Vyshnevetskiy wrote:

On Tue, 10 Oct 2006, Tom Lane wrote:

 Sergiy Vyshnevetskiy <[EMAIL PROTECTED]> writes:
> >  It's not magic, it's from ICU patch. Want me to send you a copy? :)

 You're missing my point, which is that non-ICU locale support doesn't
 necessarily recognize the same encoding names.  We would have done this
 years ago if we had a solution to that problem.

We should use IANA-standard names. If it fails - it does nothing.
Anybody porting PostgreSQL to new platform can go over the list and make a patch for their port.

Here is a new and improved patch, that closes security hole as well. To prevent DOS attack we lock LC_MESSAGES as C for any database encoding that we are unable to bind to our textdomain.
--- src/backend/utils/mb/encnames.c.orig        Mon Feb 13 00:32:56 2006
+++ src/backend/utils/mb/encnames.c     Tue Oct 10 21:16:59 2006
@@ -375,6 +375,114 @@
        }
 };
 
+/*
+ * Try to map most internal character encodings to the proper and
+ * preferred IANA string. Use this in mbutils.c to feed gettext info about
+ * the database's character encoding.
+ *
+ * Palle Girgensohn, 2005
+ */
+
+pg_enc2name pg_enc2iananame_tbl[] =
+{
+       {
+               "US-ASCII", PG_SQL_ASCII
+       },
+       {
+               "EUC-JP", PG_EUC_JP
+       },
+       {
+               "GB2312", PG_EUC_CN
+       },
+       {
+               "EUC-KR", PG_EUC_KR
+       },
+       {
+               "ISO-2022-CN", PG_EUC_TW
+       },
+       {
+               "KS_C_5601-1987", PG_JOHAB  /* either KS_C_5601-1987 or 
ISO-2022-KR ??? */
+       },
+       {
+               "UTF-8", PG_UTF8
+       },
+       {
+               "MULE_INTERNAL", PG_MULE_INTERNAL  /* is not for real */
+       },
+       {
+               "ISO-8859-1", PG_LATIN1
+       },
+       {
+               "ISO-8859-2", PG_LATIN2
+       },
+       {
+               "ISO-8859-3", PG_LATIN3
+       },
+       {
+               "ISO-8859-4", PG_LATIN4
+       },
+       {
+               "ISO-8859-9", PG_LATIN5
+       },
+       {
+               "ISO-8859-10", PG_LATIN6
+       },
+       {
+               "ISO-8859-13", PG_LATIN7
+       },
+       {
+               "ISO-8859-14", PG_LATIN8
+       },
+       {
+               "ISO-8859-15", PG_LATIN9
+       },
+       {
+               "ISO-8859-16", PG_LATIN10
+       },
+       {
+               "windows-1256", PG_WIN1256
+       },
+       {
+               "windows-874", PG_WIN874
+       },
+       {
+               "KOI8-R", PG_KOI8R
+       },
+       {
+               "windows-1251", PG_WIN1251
+       },
+       {
+               "ISO-8859-5", PG_ISO_8859_5
+       },
+       {
+               "ISO-8859-6", PG_ISO_8859_6
+       },
+       {
+               "ISO-8859-7", PG_ISO_8859_7
+       },
+       {
+               "ISO-8859-8", PG_ISO_8859_8
+       },
+       {
+               "windows-1250", PG_WIN1250
+       },
+       {
+               "Shift_JIS", PG_SJIS
+       },
+       {
+               "Big5", PG_BIG5
+       },
+       {
+               "GBK", PG_GBK
+       },
+       {
+               "cp949", PG_UHC
+       },
+       {
+               "GB18030", PG_GB18030
+       }
+};
+
 /* ----------
  * Encoding checks, for error returns -1 else encoding id
  * ----------
--- src/backend/utils/mb/mbutils.c.orig Sun May 21 23:05:48 2006
+++ src/backend/utils/mb/mbutils.c      Tue Oct 10 22:39:22 2006
@@ -12,6 +12,7 @@
 #include "miscadmin.h"
 #include "mb/pg_wchar.h"
 #include "utils/builtins.h"
+#include "utils/pg_locale.h"
 #include "utils/memutils.h"
 #include "utils/syscache.h"
 #include "catalog/namespace.h"
@@ -611,6 +612,27 @@
 
        DatabaseEncoding = &pg_enc2name_tbl[encoding];
        Assert(DatabaseEncoding->encoding == encoding);
+       
+       /* try to set charset for messages the same as database charset */
+       if (!bind_textdomain_codeset("postgres",
+               (&pg_enc2iananame_tbl[encoding])->name))
+       {
+               /* when impossible - set LC_MESSAGES to C and lock it there */
+               pg_perm_setlocale(LC_MESSAGES, "C");
+               elog(NOTICE,"cannot set messages encoding for database encoding 
%s."
+                       ,DatabaseEncoding->name);
+               locale_messages_locked=true;
+#ifdef __USE_GNU_GETTEXT
+               /* 
+                * Make change known to GNU gettext. 
+                * Code taken from GNU gettext info manual.
+                */
+               {
+                       extern int  _nl_msg_cat_cntr;
+                       ++_nl_msg_cat_cntr;
+               }
+#endif
+       }
 }
 
 void
--- src/backend/utils/adt/pg_locale.c.orig      Tue Oct 10 22:02:08 2006
+++ src/backend/utils/adt/pg_locale.c   Tue Oct 10 22:02:39 2006
@@ -61,6 +61,8 @@
 char      *locale_numeric;
 char      *locale_time;
 
+bool      locale_messages_locked=false;
+
 /* indicates whether locale information cache is valid */
 static bool CurrentLocaleConvValid = false;
 
@@ -240,6 +242,7 @@
         * On Windows, we can't even check the value, so the non-doit case
         * is a no-op
         */
+        
        if(locale_messages_locked)
        {
                elog(ERROR,"cannot change lc_messages for this database");
--- src/include/utils/pg_locale.h.orig  Thu Jan  5 02:54:51 2006
+++ src/include/utils/pg_locale.h       Tue Oct 10 22:03:22 2006
@@ -22,6 +22,8 @@
 extern char *locale_numeric;
 extern char *locale_time;
 
+extern bool locale_messages_locked;
+
 extern const char *locale_messages_assign(const char *value,
                                           bool doit, GucSource source);
 extern const char *locale_monetary_assign(const char *value,
---------------------------(end of broadcast)---------------------------
TIP 1: if posting/reading through Usenet, please send an appropriate
       subscribe-nomail command to [EMAIL PROTECTED] so that your
       message can get through to the mailing list cleanly

Reply via email to