Re: Improve readability by using designated initializers when possible

Jelte Fennema-Nio Wed, 28 Feb 2024 19:02:13 -0800

On Thu, 29 Feb 2024 at 01:57, Michael Paquier <[email protected]> wrote:
> I have doubts about the changes in raw_pg_bind_textdomain_codeset(),
> as the encoding could come from the value in the pg_database tuples
> themselves.  The current coding is slightly safer from the perspective
> of bogus input values as we would loop over pg_enc2gettext_tbl looking
> for a match.  0003 changes that so as we could point to incorrect
> memory areas rather than fail safely for the NULL check.


That's fair. Attached is a patch that adds a PG_VALID_ENCODING check
to raw_pg_bind_textdomain_codeset to solve this regression.

From a51592bda622746b9f015d7993ca19254bedbc0e Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <[email protected]>
Date: Tue, 27 Feb 2024 12:26:10 +0100
Subject: [PATCH v7] Simplify pg_enc2gettext_tbl

Use designated initialization of pg_enc2gettext_tbl to simplify the
implementation of raw_pg_bind_textdomain_codeset. Now iteration over the
array is not needed anymore. Instead the desired element can simply be
fetched by its index.
---
 src/backend/utils/mb/mbutils.c | 24 ++++------
 src/common/encnames.c          | 85 +++++++++++++++++-----------------
 src/include/mb/pg_wchar.h      | 11 ++---
 3 files changed, 55 insertions(+), 65 deletions(-)

diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index c13f947a827..5818e752280 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -1188,24 +1188,20 @@ static bool
 raw_pg_bind_textdomain_codeset(const char *domainname, int encoding)
 {
 	bool		elog_ok = (CurrentMemoryContext != NULL);
-	int			i;
 
-	for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++)
+	if (!PG_VALID_ENCODING(encoding) || pg_enc2gettext_tbl[encoding] == NULL)
 	{
-		if (pg_enc2gettext_tbl[i].encoding == encoding)
-		{
-			if (bind_textdomain_codeset(domainname,
-										pg_enc2gettext_tbl[i].name) != NULL)
-				return true;
+		return false;
+	}
 
-			if (elog_ok)
-				elog(LOG, "bind_textdomain_codeset failed");
-			else
-				write_stderr("bind_textdomain_codeset failed");
+	if (bind_textdomain_codeset(domainname,
+								pg_enc2gettext_tbl[encoding]) != NULL)
+		return true;
 
-			break;
-		}
-	}
+	if (elog_ok)
+		elog(LOG, "bind_textdomain_codeset failed");
+	else
+		write_stderr("bind_textdomain_codeset failed");
 
 	return false;
 }
diff --git a/src/common/encnames.c b/src/common/encnames.c
index dba6bd2c9ee..d94c740a28b 100644
--- a/src/common/encnames.c
+++ b/src/common/encnames.c
@@ -357,50 +357,49 @@ const pg_enc2name pg_enc2name_tbl[] =
  * This covers all encodings except MULE_INTERNAL, which is alien to gettext.
  * ----------
  */
-const pg_enc2gettext pg_enc2gettext_tbl[] =
+const char *pg_enc2gettext_tbl[] =
 {
-	{PG_SQL_ASCII, "US-ASCII"},
-	{PG_UTF8, "UTF-8"},
-	{PG_LATIN1, "LATIN1"},
-	{PG_LATIN2, "LATIN2"},
-	{PG_LATIN3, "LATIN3"},
-	{PG_LATIN4, "LATIN4"},
-	{PG_ISO_8859_5, "ISO-8859-5"},
-	{PG_ISO_8859_6, "ISO_8859-6"},
-	{PG_ISO_8859_7, "ISO-8859-7"},
-	{PG_ISO_8859_8, "ISO-8859-8"},
-	{PG_LATIN5, "LATIN5"},
-	{PG_LATIN6, "LATIN6"},
-	{PG_LATIN7, "LATIN7"},
-	{PG_LATIN8, "LATIN8"},
-	{PG_LATIN9, "LATIN-9"},
-	{PG_LATIN10, "LATIN10"},
-	{PG_KOI8R, "KOI8-R"},
-	{PG_KOI8U, "KOI8-U"},
-	{PG_WIN1250, "CP1250"},
-	{PG_WIN1251, "CP1251"},
-	{PG_WIN1252, "CP1252"},
-	{PG_WIN1253, "CP1253"},
-	{PG_WIN1254, "CP1254"},
-	{PG_WIN1255, "CP1255"},
-	{PG_WIN1256, "CP1256"},
-	{PG_WIN1257, "CP1257"},
-	{PG_WIN1258, "CP1258"},
-	{PG_WIN866, "CP866"},
-	{PG_WIN874, "CP874"},
-	{PG_EUC_CN, "EUC-CN"},
-	{PG_EUC_JP, "EUC-JP"},
-	{PG_EUC_KR, "EUC-KR"},
-	{PG_EUC_TW, "EUC-TW"},
-	{PG_EUC_JIS_2004, "EUC-JP"},
-	{PG_SJIS, "SHIFT-JIS"},
-	{PG_BIG5, "BIG5"},
-	{PG_GBK, "GBK"},
-	{PG_UHC, "UHC"},
-	{PG_GB18030, "GB18030"},
-	{PG_JOHAB, "JOHAB"},
-	{PG_SHIFT_JIS_2004, "SHIFT_JISX0213"},
-	{0, NULL}
+	[PG_SQL_ASCII] = "US-ASCII",
+	[PG_UTF8] = "UTF-8",
+	[PG_LATIN1] = "LATIN1",
+	[PG_LATIN2] = "LATIN2",
+	[PG_LATIN3] = "LATIN3",
+	[PG_LATIN4] = "LATIN4",
+	[PG_ISO_8859_5] = "ISO-8859-5",
+	[PG_ISO_8859_6] = "ISO_8859-6",
+	[PG_ISO_8859_7] = "ISO-8859-7",
+	[PG_ISO_8859_8] = "ISO-8859-8",
+	[PG_LATIN5] = "LATIN5",
+	[PG_LATIN6] = "LATIN6",
+	[PG_LATIN7] = "LATIN7",
+	[PG_LATIN8] = "LATIN8",
+	[PG_LATIN9] = "LATIN-9",
+	[PG_LATIN10] = "LATIN10",
+	[PG_KOI8R] = "KOI8-R",
+	[PG_KOI8U] = "KOI8-U",
+	[PG_WIN1250] = "CP1250",
+	[PG_WIN1251] = "CP1251",
+	[PG_WIN1252] = "CP1252",
+	[PG_WIN1253] = "CP1253",
+	[PG_WIN1254] = "CP1254",
+	[PG_WIN1255] = "CP1255",
+	[PG_WIN1256] = "CP1256",
+	[PG_WIN1257] = "CP1257",
+	[PG_WIN1258] = "CP1258",
+	[PG_WIN866] = "CP866",
+	[PG_WIN874] = "CP874",
+	[PG_EUC_CN] = "EUC-CN",
+	[PG_EUC_JP] = "EUC-JP",
+	[PG_EUC_KR] = "EUC-KR",
+	[PG_EUC_TW] = "EUC-TW",
+	[PG_EUC_JIS_2004] = "EUC-JP",
+	[PG_SJIS] = "SHIFT-JIS",
+	[PG_BIG5] = "BIG5",
+	[PG_GBK] = "GBK",
+	[PG_UHC] = "UHC",
+	[PG_GB18030] = "GB18030",
+	[PG_JOHAB] = "JOHAB",
+	[PG_SHIFT_JIS_2004] = "SHIFT_JISX0213",
 };
 
 
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index fd91aefbcb7..32e25a1a6ea 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -225,7 +225,8 @@ typedef unsigned int pg_wchar;
  * PostgreSQL encoding identifiers
  *
  * WARNING: If you add some encoding don't forget to update
- *			the pg_enc2name_tbl[] array (in src/common/encnames.c) and
+ *			the pg_enc2name_tbl[] array (in src/common/encnames.c),
+ *			the pg_enc2gettext_tbl[] array (in src/common/encnames.c) and
  *			the pg_wchar_table[] array (in src/common/wchar.c) and to check
  *			PG_ENCODING_BE_LAST macro.
  *
@@ -365,13 +366,7 @@ extern PGDLLIMPORT const pg_enc2name pg_enc2name_tbl[];
 /*
  * Encoding names for gettext
  */
-typedef struct pg_enc2gettext
-{
-	pg_enc		encoding;
-	const char *name;
-} pg_enc2gettext;
-
-extern PGDLLIMPORT const pg_enc2gettext pg_enc2gettext_tbl[];
+extern PGDLLIMPORT const char *pg_enc2gettext_tbl[];
 
 /*
  * pg_wchar stuff

base-commit: ada87a4d95fc39dfb1214edf6653390314b6f0df
-- 
2.34.1

Re: Improve readability by using designated initializers when possible

Reply via email to