On Thu, 29 Feb 2024 at 01:57, Michael Paquier <mich...@paquier.xyz> wrote: > I have doubts about the changes in raw_pg_bind_textdomain_codeset(), > as the encoding could come from the value in the pg_database tuples > themselves. The current coding is slightly safer from the perspective > of bogus input values as we would loop over pg_enc2gettext_tbl looking > for a match. 0003 changes that so as we could point to incorrect > memory areas rather than fail safely for the NULL check.
That's fair. Attached is a patch that adds a PG_VALID_ENCODING check to raw_pg_bind_textdomain_codeset to solve this regression.
From a51592bda622746b9f015d7993ca19254bedbc0e Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio <jelte.fenn...@microsoft.com> Date: Tue, 27 Feb 2024 12:26:10 +0100 Subject: [PATCH v7] Simplify pg_enc2gettext_tbl Use designated initialization of pg_enc2gettext_tbl to simplify the implementation of raw_pg_bind_textdomain_codeset. Now iteration over the array is not needed anymore. Instead the desired element can simply be fetched by its index. --- src/backend/utils/mb/mbutils.c | 24 ++++------ src/common/encnames.c | 85 +++++++++++++++++----------------- src/include/mb/pg_wchar.h | 11 ++--- 3 files changed, 55 insertions(+), 65 deletions(-) diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index c13f947a827..5818e752280 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -1188,24 +1188,20 @@ static bool raw_pg_bind_textdomain_codeset(const char *domainname, int encoding) { bool elog_ok = (CurrentMemoryContext != NULL); - int i; - for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++) + if (!PG_VALID_ENCODING(encoding) || pg_enc2gettext_tbl[encoding] == NULL) { - if (pg_enc2gettext_tbl[i].encoding == encoding) - { - if (bind_textdomain_codeset(domainname, - pg_enc2gettext_tbl[i].name) != NULL) - return true; + return false; + } - if (elog_ok) - elog(LOG, "bind_textdomain_codeset failed"); - else - write_stderr("bind_textdomain_codeset failed"); + if (bind_textdomain_codeset(domainname, + pg_enc2gettext_tbl[encoding]) != NULL) + return true; - break; - } - } + if (elog_ok) + elog(LOG, "bind_textdomain_codeset failed"); + else + write_stderr("bind_textdomain_codeset failed"); return false; } diff --git a/src/common/encnames.c b/src/common/encnames.c index dba6bd2c9ee..d94c740a28b 100644 --- a/src/common/encnames.c +++ b/src/common/encnames.c @@ -357,50 +357,49 @@ const pg_enc2name pg_enc2name_tbl[] = * This covers all encodings except MULE_INTERNAL, which is alien to gettext. * ---------- */ -const pg_enc2gettext pg_enc2gettext_tbl[] = +const char *pg_enc2gettext_tbl[] = { - {PG_SQL_ASCII, "US-ASCII"}, - {PG_UTF8, "UTF-8"}, - {PG_LATIN1, "LATIN1"}, - {PG_LATIN2, "LATIN2"}, - {PG_LATIN3, "LATIN3"}, - {PG_LATIN4, "LATIN4"}, - {PG_ISO_8859_5, "ISO-8859-5"}, - {PG_ISO_8859_6, "ISO_8859-6"}, - {PG_ISO_8859_7, "ISO-8859-7"}, - {PG_ISO_8859_8, "ISO-8859-8"}, - {PG_LATIN5, "LATIN5"}, - {PG_LATIN6, "LATIN6"}, - {PG_LATIN7, "LATIN7"}, - {PG_LATIN8, "LATIN8"}, - {PG_LATIN9, "LATIN-9"}, - {PG_LATIN10, "LATIN10"}, - {PG_KOI8R, "KOI8-R"}, - {PG_KOI8U, "KOI8-U"}, - {PG_WIN1250, "CP1250"}, - {PG_WIN1251, "CP1251"}, - {PG_WIN1252, "CP1252"}, - {PG_WIN1253, "CP1253"}, - {PG_WIN1254, "CP1254"}, - {PG_WIN1255, "CP1255"}, - {PG_WIN1256, "CP1256"}, - {PG_WIN1257, "CP1257"}, - {PG_WIN1258, "CP1258"}, - {PG_WIN866, "CP866"}, - {PG_WIN874, "CP874"}, - {PG_EUC_CN, "EUC-CN"}, - {PG_EUC_JP, "EUC-JP"}, - {PG_EUC_KR, "EUC-KR"}, - {PG_EUC_TW, "EUC-TW"}, - {PG_EUC_JIS_2004, "EUC-JP"}, - {PG_SJIS, "SHIFT-JIS"}, - {PG_BIG5, "BIG5"}, - {PG_GBK, "GBK"}, - {PG_UHC, "UHC"}, - {PG_GB18030, "GB18030"}, - {PG_JOHAB, "JOHAB"}, - {PG_SHIFT_JIS_2004, "SHIFT_JISX0213"}, - {0, NULL} + [PG_SQL_ASCII] = "US-ASCII", + [PG_UTF8] = "UTF-8", + [PG_LATIN1] = "LATIN1", + [PG_LATIN2] = "LATIN2", + [PG_LATIN3] = "LATIN3", + [PG_LATIN4] = "LATIN4", + [PG_ISO_8859_5] = "ISO-8859-5", + [PG_ISO_8859_6] = "ISO_8859-6", + [PG_ISO_8859_7] = "ISO-8859-7", + [PG_ISO_8859_8] = "ISO-8859-8", + [PG_LATIN5] = "LATIN5", + [PG_LATIN6] = "LATIN6", + [PG_LATIN7] = "LATIN7", + [PG_LATIN8] = "LATIN8", + [PG_LATIN9] = "LATIN-9", + [PG_LATIN10] = "LATIN10", + [PG_KOI8R] = "KOI8-R", + [PG_KOI8U] = "KOI8-U", + [PG_WIN1250] = "CP1250", + [PG_WIN1251] = "CP1251", + [PG_WIN1252] = "CP1252", + [PG_WIN1253] = "CP1253", + [PG_WIN1254] = "CP1254", + [PG_WIN1255] = "CP1255", + [PG_WIN1256] = "CP1256", + [PG_WIN1257] = "CP1257", + [PG_WIN1258] = "CP1258", + [PG_WIN866] = "CP866", + [PG_WIN874] = "CP874", + [PG_EUC_CN] = "EUC-CN", + [PG_EUC_JP] = "EUC-JP", + [PG_EUC_KR] = "EUC-KR", + [PG_EUC_TW] = "EUC-TW", + [PG_EUC_JIS_2004] = "EUC-JP", + [PG_SJIS] = "SHIFT-JIS", + [PG_BIG5] = "BIG5", + [PG_GBK] = "GBK", + [PG_UHC] = "UHC", + [PG_GB18030] = "GB18030", + [PG_JOHAB] = "JOHAB", + [PG_SHIFT_JIS_2004] = "SHIFT_JISX0213", }; diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index fd91aefbcb7..32e25a1a6ea 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -225,7 +225,8 @@ typedef unsigned int pg_wchar; * PostgreSQL encoding identifiers * * WARNING: If you add some encoding don't forget to update - * the pg_enc2name_tbl[] array (in src/common/encnames.c) and + * the pg_enc2name_tbl[] array (in src/common/encnames.c), + * the pg_enc2gettext_tbl[] array (in src/common/encnames.c) and * the pg_wchar_table[] array (in src/common/wchar.c) and to check * PG_ENCODING_BE_LAST macro. * @@ -365,13 +366,7 @@ extern PGDLLIMPORT const pg_enc2name pg_enc2name_tbl[]; /* * Encoding names for gettext */ -typedef struct pg_enc2gettext -{ - pg_enc encoding; - const char *name; -} pg_enc2gettext; - -extern PGDLLIMPORT const pg_enc2gettext pg_enc2gettext_tbl[]; +extern PGDLLIMPORT const char *pg_enc2gettext_tbl[]; /* * pg_wchar stuff base-commit: ada87a4d95fc39dfb1214edf6653390314b6f0df -- 2.34.1