On Mon, 2024-07-29 at 21:45 +0200, Peter Eisentraut wrote:
> I have also re-reviewed the patches and I agree they are good to go.

I found a couple issues with the later patches:

* There was still some confusion about the default collation vs.
datcollate/datctype for callers of wchar2char() and char2wchar() (those
functions only work for libc). I introduced a new pg_locale_t structure
to represent datcollate/datctype regardless of datlocprovider to solve
this.

* Another loose end relying on setlocale(): in selfuncs.c, there's
still a call directly to strxfrm(), which depends on setlocale(). I
changed this to lookup the collation and then use pg_strxfrm(). That
should improve histogram selectivity estimates because it uses the
correct provider, rather than relying on setlocale(), right?

New series attached.

Regards,
        Jeff Davis

From 5b903c82f34f5da9cab58ecd0a2683454d6ac9ed Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Wed, 5 Jun 2024 14:48:07 -0700
Subject: [PATCH v6 1/3] Make datcollate/datctype accessible as a pg_locale_t.

get_db_env_locale() returns a libc locale representing the LC_COLLATE
/ LC_CTYPE environment, which is the same as the database default
collation if and only if the datlocprovider is libc.

Update callers in ts_locale.c to use get_db_env_locale() instead of
NULL.

Discussion: https://postgr.es/m/cfd9eb85-c52a-4ec9-a90e-a5e4de56e...@eisentraut.org
Reviewed-by: Peter Eisentraut, Andreas Karlsson
---
 src/backend/tsearch/ts_locale.c   | 37 ++++++++++++++++---------
 src/backend/tsearch/wparser_def.c |  6 +++--
 src/backend/utils/adt/pg_locale.c | 45 ++++++++++++++++++++++++++++---
 src/backend/utils/init/postinit.c |  5 +---
 src/include/utils/pg_locale.h     |  5 ++--
 5 files changed, 74 insertions(+), 24 deletions(-)

diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index bc44599de6a..6befd8e82d6 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -13,6 +13,7 @@
  */
 #include "postgres.h"
 
+#include "catalog/pg_collation.h"
 #include "common/string.h"
 #include "storage/fd.h"
 #include "tsearch/ts_locale.h"
@@ -36,9 +37,11 @@ t_isdigit(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
 
-	if (clen == 1 || database_ctype_is_c)
+	/* TODO: determine collation properly */
+	pg_locale_t mylocale = get_db_env_locale();
+
+	if (clen == 1 || mylocale->ctype_is_c)
 		return isdigit(TOUCHAR(ptr));
 
 	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -51,9 +54,11 @@ t_isspace(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
 
-	if (clen == 1 || database_ctype_is_c)
+	/* TODO: determine collation properly */
+	pg_locale_t mylocale = get_db_env_locale();
+
+	if (clen == 1 || mylocale->ctype_is_c)
 		return isspace(TOUCHAR(ptr));
 
 	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -66,9 +71,11 @@ t_isalpha(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
 
-	if (clen == 1 || database_ctype_is_c)
+	/* TODO: determine collation properly */
+	pg_locale_t mylocale = get_db_env_locale();
+
+	if (clen == 1 || mylocale->ctype_is_c)
 		return isalpha(TOUCHAR(ptr));
 
 	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -81,9 +88,11 @@ t_isalnum(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
 
-	if (clen == 1 || database_ctype_is_c)
+	/* TODO: determine collation properly */
+	pg_locale_t mylocale = get_db_env_locale();
+
+	if (clen == 1 || mylocale->ctype_is_c)
 		return isalnum(TOUCHAR(ptr));
 
 	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -96,9 +105,11 @@ t_isprint(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
 
-	if (clen == 1 || database_ctype_is_c)
+	/* TODO: determine collation properly */
+	pg_locale_t mylocale = get_db_env_locale();
+
+	if (clen == 1 || mylocale->ctype_is_c)
 		return isprint(TOUCHAR(ptr));
 
 	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -266,7 +277,9 @@ char *
 lowerstr_with_len(const char *str, int len)
 {
 	char	   *out;
-	pg_locale_t mylocale = 0;	/* TODO */
+
+	/* TODO: determine collation properly */
+	pg_locale_t mylocale = get_db_env_locale();
 
 	if (len == 0)
 		return pstrdup("");
@@ -277,7 +290,7 @@ lowerstr_with_len(const char *str, int len)
 	 * Also, for a C locale there is no need to process as multibyte. From
 	 * backend/utils/adt/oracle_compat.c Teodor
 	 */
-	if (pg_database_encoding_max_length() > 1 && !database_ctype_is_c)
+	if (pg_database_encoding_max_length() > 1 && !mylocale->ctype_is_c)
 	{
 		wchar_t    *wstr,
 				   *wptr;
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 3919ef27b57..45caec0c4f0 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -17,6 +17,7 @@
 #include <limits.h>
 #include <wctype.h>
 
+#include "catalog/pg_collation.h"
 #include "commands/defrem.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
@@ -299,10 +300,11 @@ TParserInit(char *str, int len)
 	 */
 	if (prs->charmaxlen > 1)
 	{
-		pg_locale_t mylocale = 0;	/* TODO */
+		/* TODO: determine collation properly */
+		pg_locale_t mylocale = get_db_env_locale();
 
 		prs->usewide = true;
-		if (database_ctype_is_c)
+		if (mylocale->ctype_is_c)
 		{
 			/*
 			 * char2wchar doesn't work for C-locale and sizeof(pg_wchar) could
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 627ab89d7cc..0295d834cc5 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -114,10 +114,8 @@ char	   *localized_full_days[7 + 1];
 char	   *localized_abbrev_months[12 + 1];
 char	   *localized_full_months[12 + 1];
 
-/* is the databases's LC_CTYPE the C locale? */
-bool		database_ctype_is_c = false;
-
 static struct pg_locale_struct default_locale;
+static struct pg_locale_struct database_env_locale;
 
 /* indicates whether locale information cache is valid */
 static bool CurrentLocaleConvValid = false;
@@ -1471,6 +1469,42 @@ pg_locale_deterministic(pg_locale_t locale)
 		return locale->deterministic;
 }
 
+/*
+ * Initialize the database environment locale and store in a pg_locale_t.
+ */
+void
+init_db_env_locale(const char *datcollate, const char *datctype)
+{
+	Assert(database_env_locale.provider == (char) 0);
+
+	database_env_locale.provider = COLLPROVIDER_LIBC;
+	database_env_locale.deterministic = true;
+	database_env_locale.collate_is_c = (strcmp(datcollate, "C") == 0) ||
+		(strcmp(datcollate, "POSIX") == 0);
+	database_env_locale.ctype_is_c = (strcmp(datctype, "C") == 0) ||
+		(strcmp(datctype, "POSIX") == 0);
+
+	make_libc_collator(datcollate, datctype, &database_env_locale);
+}
+
+/*
+ * Return pg_locale_t representing the database environment locale.
+ *
+ * The provider is always libc, and it represents the server environment
+ * LC_COLLATE and LC_CTYPE.
+ *
+ * Most callers should use pg_newlocale_from_collation(DEFAULT_COLLATION_OID)
+ * instead to get a pg_locale_t representing the database default collation
+ * (which might be any provider). Use get_db_env_locale() only if the libc
+ * provider is needed, such as with wchar2char()/char2wchar().
+ */
+pg_locale_t
+get_db_env_locale(void)
+{
+	Assert(database_env_locale.provider != (char) 0);
+	return &database_env_locale;
+}
+
 /*
  * Initialize default_locale with database locale settings.
  */
@@ -1482,6 +1516,8 @@ init_database_collation(void)
 	Datum		datum;
 	bool		isnull;
 
+	Assert(default_locale.provider == (char) 0);
+
 	/* Fetch our pg_database row normally, via syscache */
 	tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
 	if (!HeapTupleIsValid(tup))
@@ -1571,7 +1607,10 @@ pg_newlocale_from_collation(Oid collid)
 	Assert(OidIsValid(collid));
 
 	if (collid == DEFAULT_COLLATION_OID)
+	{
+		Assert(default_locale.provider != (char) 0);
 		return &default_locale;
+	}
 
 	cache_entry = lookup_collation_cache(collid);
 
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 13524ea488a..23ac403e390 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -418,10 +418,7 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
 						   " which is not recognized by setlocale().", ctype),
 				 errhint("Recreate the database with another locale or install the missing locale.")));
 
-	if (strcmp(ctype, "C") == 0 ||
-		strcmp(ctype, "POSIX") == 0)
-		database_ctype_is_c = true;
-
+	init_db_env_locale(collate, ctype);
 	init_database_collation();
 
 	/*
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index f41d33975be..47b2942c9d8 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -48,9 +48,6 @@ extern PGDLLIMPORT char *localized_full_days[];
 extern PGDLLIMPORT char *localized_abbrev_months[];
 extern PGDLLIMPORT char *localized_full_months[];
 
-/* is the databases's LC_CTYPE the C locale? */
-extern PGDLLIMPORT bool database_ctype_is_c;
-
 extern bool check_locale(int category, const char *locale, char **canonname);
 extern char *pg_perm_setlocale(int category, const char *locale);
 
@@ -112,6 +109,8 @@ extern void make_icu_collator(const char *iculocstr,
 							  struct pg_locale_struct *resultp);
 
 extern bool pg_locale_deterministic(pg_locale_t locale);
+extern void init_db_env_locale(const char *datcollate, const char *datctype);
+extern pg_locale_t get_db_env_locale(void);
 extern void init_database_collation(void);
 extern pg_locale_t pg_newlocale_from_collation(Oid collid);
 
-- 
2.34.1

From 1eea055318b07155fe025d9f6cf56dadcea040a0 Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Wed, 5 Jun 2024 11:58:59 -0700
Subject: [PATCH v6 2/3] Remove support for null pg_locale_t.

Previously, passing NULL for pg_locale_t meant "use the libc provider
and the server environment". Now that the database collation is
represented as a proper pg_locale_t (not dependent on setlocale()),
remove special cases for NULL.

Discussion: https://postgr.es/m/cfd9eb85-c52a-4ec9-a90e-a5e4de56e...@eisentraut.org
Reviewed-by: Peter Eisentraut, Andreas Karlsson
---
 src/backend/access/hash/hashfunc.c |  10 +--
 src/backend/regex/regc_pg_locale.c | 113 +----------------------------
 src/backend/utils/adt/formatting.c |  84 ++++++---------------
 src/backend/utils/adt/like.c       |  10 +--
 src/backend/utils/adt/pg_locale.c  |  78 ++++++--------------
 src/backend/utils/adt/varchar.c    |  10 +--
 src/backend/utils/adt/varlena.c    |  28 +++----
 7 files changed, 69 insertions(+), 264 deletions(-)

diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index ce8ee0ea2ef..d151751e185 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -268,7 +268,7 @@ hashtext(PG_FUNCTION_ARGS)
 {
 	text	   *key = PG_GETARG_TEXT_PP(0);
 	Oid			collid = PG_GET_COLLATION();
-	pg_locale_t mylocale = 0;
+	pg_locale_t mylocale;
 	Datum		result;
 
 	if (!collid)
@@ -277,8 +277,7 @@ hashtext(PG_FUNCTION_ARGS)
 				 errmsg("could not determine which collation to use for string hashing"),
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 
-	if (!lc_collate_is_c(collid))
-		mylocale = pg_newlocale_from_collation(collid);
+	mylocale = pg_newlocale_from_collation(collid);
 
 	if (pg_locale_deterministic(mylocale))
 	{
@@ -322,7 +321,7 @@ hashtextextended(PG_FUNCTION_ARGS)
 {
 	text	   *key = PG_GETARG_TEXT_PP(0);
 	Oid			collid = PG_GET_COLLATION();
-	pg_locale_t mylocale = 0;
+	pg_locale_t mylocale;
 	Datum		result;
 
 	if (!collid)
@@ -331,8 +330,7 @@ hashtextextended(PG_FUNCTION_ARGS)
 				 errmsg("could not determine which collation to use for string hashing"),
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 
-	if (!lc_collate_is_c(collid))
-		mylocale = pg_newlocale_from_collation(collid);
+	mylocale = pg_newlocale_from_collation(collid);
 
 	if (pg_locale_deterministic(mylocale))
 	{
diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 9d98d10a285..947d73f3e0f 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -67,8 +67,6 @@ typedef enum
 {
 	PG_REGEX_LOCALE_C,			/* C locale (encoding independent) */
 	PG_REGEX_BUILTIN,			/* built-in Unicode semantics */
-	PG_REGEX_LOCALE_WIDE,		/* Use <wctype.h> functions */
-	PG_REGEX_LOCALE_1BYTE,		/* Use <ctype.h> functions */
 	PG_REGEX_LOCALE_WIDE_L,		/* Use locale_t <wctype.h> functions */
 	PG_REGEX_LOCALE_1BYTE_L,	/* Use locale_t <ctype.h> functions */
 	PG_REGEX_LOCALE_ICU,		/* Use ICU uchar.h functions */
@@ -261,13 +259,13 @@ pg_set_regex_collation(Oid collation)
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 					 errmsg("nondeterministic collations are not supported for regular expressions")));
 
-		if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_BUILTIN)
+		if (pg_regex_locale->provider == COLLPROVIDER_BUILTIN)
 		{
 			Assert(GetDatabaseEncoding() == PG_UTF8);
 			pg_regex_strategy = PG_REGEX_BUILTIN;
 		}
 #ifdef USE_ICU
-		else if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU)
+		else if (pg_regex_locale->provider == COLLPROVIDER_ICU)
 		{
 			pg_regex_strategy = PG_REGEX_LOCALE_ICU;
 		}
@@ -275,19 +273,9 @@ pg_set_regex_collation(Oid collation)
 		else
 		{
 			if (GetDatabaseEncoding() == PG_UTF8)
-			{
-				if (pg_regex_locale)
-					pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
-				else
-					pg_regex_strategy = PG_REGEX_LOCALE_WIDE;
-			}
+				pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
 			else
-			{
-				if (pg_regex_locale)
-					pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
-				else
-					pg_regex_strategy = PG_REGEX_LOCALE_1BYTE;
-			}
+				pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
 		}
 
 		pg_regex_collation = collation;
@@ -304,13 +292,6 @@ pg_wc_isdigit(pg_wchar c)
 					(pg_char_properties[c] & PG_ISDIGIT));
 		case PG_REGEX_BUILTIN:
 			return pg_u_isdigit(c, true);
-		case PG_REGEX_LOCALE_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswdigit((wint_t) c);
-			/* FALL THRU */
-		case PG_REGEX_LOCALE_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isdigit((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 				return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
@@ -338,13 +319,6 @@ pg_wc_isalpha(pg_wchar c)
 					(pg_char_properties[c] & PG_ISALPHA));
 		case PG_REGEX_BUILTIN:
 			return pg_u_isalpha(c);
-		case PG_REGEX_LOCALE_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswalpha((wint_t) c);
-			/* FALL THRU */
-		case PG_REGEX_LOCALE_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isalpha((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 				return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
@@ -372,13 +346,6 @@ pg_wc_isalnum(pg_wchar c)
 					(pg_char_properties[c] & PG_ISALNUM));
 		case PG_REGEX_BUILTIN:
 			return pg_u_isalnum(c, true);
-		case PG_REGEX_LOCALE_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswalnum((wint_t) c);
-			/* FALL THRU */
-		case PG_REGEX_LOCALE_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isalnum((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 				return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
@@ -415,13 +382,6 @@ pg_wc_isupper(pg_wchar c)
 					(pg_char_properties[c] & PG_ISUPPER));
 		case PG_REGEX_BUILTIN:
 			return pg_u_isupper(c);
-		case PG_REGEX_LOCALE_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswupper((wint_t) c);
-			/* FALL THRU */
-		case PG_REGEX_LOCALE_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isupper((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 				return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
@@ -449,13 +409,6 @@ pg_wc_islower(pg_wchar c)
 					(pg_char_properties[c] & PG_ISLOWER));
 		case PG_REGEX_BUILTIN:
 			return pg_u_islower(c);
-		case PG_REGEX_LOCALE_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswlower((wint_t) c);
-			/* FALL THRU */
-		case PG_REGEX_LOCALE_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					islower((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 				return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
@@ -483,13 +436,6 @@ pg_wc_isgraph(pg_wchar c)
 					(pg_char_properties[c] & PG_ISGRAPH));
 		case PG_REGEX_BUILTIN:
 			return pg_u_isgraph(c);
-		case PG_REGEX_LOCALE_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswgraph((wint_t) c);
-			/* FALL THRU */
-		case PG_REGEX_LOCALE_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isgraph((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 				return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
@@ -517,13 +463,6 @@ pg_wc_isprint(pg_wchar c)
 					(pg_char_properties[c] & PG_ISPRINT));
 		case PG_REGEX_BUILTIN:
 			return pg_u_isprint(c);
-		case PG_REGEX_LOCALE_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswprint((wint_t) c);
-			/* FALL THRU */
-		case PG_REGEX_LOCALE_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isprint((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 				return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
@@ -551,13 +490,6 @@ pg_wc_ispunct(pg_wchar c)
 					(pg_char_properties[c] & PG_ISPUNCT));
 		case PG_REGEX_BUILTIN:
 			return pg_u_ispunct(c, true);
-		case PG_REGEX_LOCALE_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswpunct((wint_t) c);
-			/* FALL THRU */
-		case PG_REGEX_LOCALE_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					ispunct((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 				return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
@@ -585,13 +517,6 @@ pg_wc_isspace(pg_wchar c)
 					(pg_char_properties[c] & PG_ISSPACE));
 		case PG_REGEX_BUILTIN:
 			return pg_u_isspace(c);
-		case PG_REGEX_LOCALE_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswspace((wint_t) c);
-			/* FALL THRU */
-		case PG_REGEX_LOCALE_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isspace((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 				return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
@@ -620,20 +545,6 @@ pg_wc_toupper(pg_wchar c)
 			return c;
 		case PG_REGEX_BUILTIN:
 			return unicode_uppercase_simple(c);
-		case PG_REGEX_LOCALE_WIDE:
-			/* force C behavior for ASCII characters, per comments above */
-			if (c <= (pg_wchar) 127)
-				return pg_ascii_toupper((unsigned char) c);
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return towupper((wint_t) c);
-			/* FALL THRU */
-		case PG_REGEX_LOCALE_1BYTE:
-			/* force C behavior for ASCII characters, per comments above */
-			if (c <= (pg_wchar) 127)
-				return pg_ascii_toupper((unsigned char) c);
-			if (c <= (pg_wchar) UCHAR_MAX)
-				return toupper((unsigned char) c);
-			return c;
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 				return towupper_l((wint_t) c, pg_regex_locale->info.lt);
@@ -662,20 +573,6 @@ pg_wc_tolower(pg_wchar c)
 			return c;
 		case PG_REGEX_BUILTIN:
 			return unicode_lowercase_simple(c);
-		case PG_REGEX_LOCALE_WIDE:
-			/* force C behavior for ASCII characters, per comments above */
-			if (c <= (pg_wchar) 127)
-				return pg_ascii_tolower((unsigned char) c);
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return towlower((wint_t) c);
-			/* FALL THRU */
-		case PG_REGEX_LOCALE_1BYTE:
-			/* force C behavior for ASCII characters, per comments above */
-			if (c <= (pg_wchar) 127)
-				return pg_ascii_tolower((unsigned char) c);
-			if (c <= (pg_wchar) UCHAR_MAX)
-				return tolower((unsigned char) c);
-			return c;
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 				return towlower_l((wint_t) c, pg_regex_locale->info.lt);
@@ -829,11 +726,9 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
 		case PG_REGEX_BUILTIN:
 			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
 			break;
-		case PG_REGEX_LOCALE_WIDE:
 		case PG_REGEX_LOCALE_WIDE_L:
 			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
 			break;
-		case PG_REGEX_LOCALE_1BYTE:
 		case PG_REGEX_LOCALE_1BYTE_L:
 #if MAX_SIMPLE_CHR >= UCHAR_MAX
 			max_chr = (pg_wchar) UCHAR_MAX;
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 8736ada4be2..68069fcfd3b 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1665,7 +1665,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 		mylocale = pg_newlocale_from_collation(collid);
 
 #ifdef USE_ICU
-		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+		if (mylocale->provider == COLLPROVIDER_ICU)
 		{
 			int32_t		len_uchar;
 			int32_t		len_conv;
@@ -1681,7 +1681,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 		}
 		else
 #endif
-		if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN)
+		if (mylocale->provider == COLLPROVIDER_BUILTIN)
 		{
 			const char *src = buff;
 			size_t		srclen = nbytes;
@@ -1710,7 +1710,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 		}
 		else
 		{
-			Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
+			Assert(mylocale->provider == COLLPROVIDER_LIBC);
 
 			if (pg_database_encoding_max_length() > 1)
 			{
@@ -1730,12 +1730,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 				char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
 
 				for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
-				{
-					if (mylocale)
-						workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
-					else
-						workspace[curr_char] = towlower(workspace[curr_char]);
-				}
+					workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
 
 				/*
 				 * Make result large enough; case change might change number
@@ -1761,12 +1756,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 				 * collations you get exactly what the collation says.
 				 */
 				for (p = result; *p; p++)
-				{
-					if (mylocale)
-						*p = tolower_l((unsigned char) *p, mylocale->info.lt);
-					else
-						*p = pg_tolower((unsigned char) *p);
-				}
+					*p = tolower_l((unsigned char) *p, mylocale->info.lt);
 			}
 		}
 	}
@@ -1813,7 +1803,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 		mylocale = pg_newlocale_from_collation(collid);
 
 #ifdef USE_ICU
-		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+		if (mylocale->provider == COLLPROVIDER_ICU)
 		{
 			int32_t		len_uchar,
 						len_conv;
@@ -1829,7 +1819,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 		}
 		else
 #endif
-		if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN)
+		if (mylocale->provider == COLLPROVIDER_BUILTIN)
 		{
 			const char *src = buff;
 			size_t		srclen = nbytes;
@@ -1858,7 +1848,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 		}
 		else
 		{
-			Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
+			Assert(mylocale->provider == COLLPROVIDER_LIBC);
 
 			if (pg_database_encoding_max_length() > 1)
 			{
@@ -1878,12 +1868,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 				char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
 
 				for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
-				{
-					if (mylocale)
-						workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
-					else
-						workspace[curr_char] = towupper(workspace[curr_char]);
-				}
+					workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
 
 				/*
 				 * Make result large enough; case change might change number
@@ -1909,12 +1894,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 				 * collations you get exactly what the collation says.
 				 */
 				for (p = result; *p; p++)
-				{
-					if (mylocale)
-						*p = toupper_l((unsigned char) *p, mylocale->info.lt);
-					else
-						*p = pg_toupper((unsigned char) *p);
-				}
+					*p = toupper_l((unsigned char) *p, mylocale->info.lt);
 			}
 		}
 	}
@@ -2003,7 +1983,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 		mylocale = pg_newlocale_from_collation(collid);
 
 #ifdef USE_ICU
-		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+		if (mylocale->provider == COLLPROVIDER_ICU)
 		{
 			int32_t		len_uchar,
 						len_conv;
@@ -2019,7 +1999,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 		}
 		else
 #endif
-		if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN)
+		if (mylocale->provider == COLLPROVIDER_BUILTIN)
 		{
 			const char *src = buff;
 			size_t		srclen = nbytes;
@@ -2060,7 +2040,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 		}
 		else
 		{
-			Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
+			Assert(mylocale->provider == COLLPROVIDER_LIBC);
 
 			if (pg_database_encoding_max_length() > 1)
 			{
@@ -2081,22 +2061,11 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 
 				for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
 				{
-					if (mylocale)
-					{
-						if (wasalnum)
-							workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
-						else
-							workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
-						wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
-					}
+					if (wasalnum)
+						workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
 					else
-					{
-						if (wasalnum)
-							workspace[curr_char] = towlower(workspace[curr_char]);
-						else
-							workspace[curr_char] = towupper(workspace[curr_char]);
-						wasalnum = iswalnum(workspace[curr_char]);
-					}
+						workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
+					wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
 				}
 
 				/*
@@ -2124,22 +2093,11 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 				 */
 				for (p = result; *p; p++)
 				{
-					if (mylocale)
-					{
-						if (wasalnum)
-							*p = tolower_l((unsigned char) *p, mylocale->info.lt);
-						else
-							*p = toupper_l((unsigned char) *p, mylocale->info.lt);
-						wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
-					}
+					if (wasalnum)
+						*p = tolower_l((unsigned char) *p, mylocale->info.lt);
 					else
-					{
-						if (wasalnum)
-							*p = pg_tolower((unsigned char) *p);
-						else
-							*p = pg_toupper((unsigned char) *p);
-						wasalnum = isalnum((unsigned char) *p);
-					}
+						*p = toupper_l((unsigned char) *p, mylocale->info.lt);
+					wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
 				}
 			}
 		}
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 57ead66b5aa..0ecc96d48e5 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -174,8 +174,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 			   *p;
 	int			slen,
 				plen;
-	pg_locale_t locale = 0;
-	bool		locale_is_c = false;
+	pg_locale_t locale;
 
 	if (!OidIsValid(collation))
 	{
@@ -189,10 +188,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 	}
 
-	if (lc_ctype_is_c(collation))
-		locale_is_c = true;
-	else
-		locale = pg_newlocale_from_collation(collation);
+	locale = pg_newlocale_from_collation(collation);
 
 	if (!pg_locale_deterministic(locale))
 		ereport(ERROR,
@@ -228,7 +224,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 		plen = VARSIZE_ANY_EXHDR(pat);
 		s = VARDATA_ANY(str);
 		slen = VARSIZE_ANY_EXHDR(str);
-		return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
+		return SB_IMatchText(s, slen, p, plen, locale, locale->ctype_is_c);
 	}
 }
 
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 0295d834cc5..ccd6180a743 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1169,7 +1169,8 @@ get_iso_localename(const char *winlocname)
 		char	   *hyphen;
 
 		/* Locale names use only ASCII, any conversion locale suffices. */
-		rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
+		rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages),
+						get_db_env_locale());
 		if (rc == -1 || rc == sizeof(iso_lc_messages))
 			return NULL;
 
@@ -1462,11 +1463,7 @@ make_icu_collator(const char *iculocstr,
 bool
 pg_locale_deterministic(pg_locale_t locale)
 {
-	/* default locale must always be deterministic */
-	if (locale == NULL)
-		return true;
-	else
-		return locale->deterministic;
+	return locale->deterministic;
 }
 
 /*
@@ -1867,7 +1864,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
 	int			r;
 	int			result;
 
-	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+	Assert(locale->provider == COLLPROVIDER_LIBC);
 	Assert(GetDatabaseEncoding() == PG_UTF8);
 #ifndef WIN32
 	Assert(false);
@@ -1907,10 +1904,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
 	((LPWSTR) a2p)[r] = 0;
 
 	errno = 0;
-	if (locale)
-		result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
-	else
-		result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
+	result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
 	if (result == 2147483647)	/* _NLSCMPERROR; missing from mingw headers */
 		ereport(ERROR,
 				(errmsg("could not compare Unicode strings: %m")));
@@ -1936,7 +1930,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
 {
 	int			result;
 
-	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+	Assert(locale->provider == COLLPROVIDER_LIBC);
 #ifdef WIN32
 	if (GetDatabaseEncoding() == PG_UTF8)
 	{
@@ -1947,10 +1941,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
 	}
 	else
 #endif							/* WIN32 */
-	if (locale)
 		result = strcoll_l(arg1, arg2, locale->info.lt);
-	else
-		result = strcoll(arg1, arg2);
 
 	return result;
 }
@@ -1972,7 +1963,7 @@ pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2,
 	char	   *arg2n;
 	int			result;
 
-	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+	Assert(locale->provider == COLLPROVIDER_LIBC);
 
 #ifdef WIN32
 	/* check for this case before doing the work for nul-termination */
@@ -2118,7 +2109,7 @@ pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
 {
 	int			result;
 
-	if (!locale || locale->provider == COLLPROVIDER_LIBC)
+	if (locale->provider == COLLPROVIDER_LIBC)
 		result = pg_strcoll_libc(arg1, arg2, locale);
 #ifdef USE_ICU
 	else if (locale->provider == COLLPROVIDER_ICU)
@@ -2154,7 +2145,7 @@ pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2,
 {
 	int			result;
 
-	if (!locale || locale->provider == COLLPROVIDER_LIBC)
+	if (locale->provider == COLLPROVIDER_LIBC)
 		result = pg_strncoll_libc(arg1, len1, arg2, len2, locale);
 #ifdef USE_ICU
 	else if (locale->provider == COLLPROVIDER_ICU)
@@ -2172,13 +2163,10 @@ static size_t
 pg_strxfrm_libc(char *dest, const char *src, size_t destsize,
 				pg_locale_t locale)
 {
-	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+	Assert(locale->provider == COLLPROVIDER_LIBC);
 
 #ifdef TRUST_STRXFRM
-	if (locale)
-		return strxfrm_l(dest, src, destsize, locale->info.lt);
-	else
-		return strxfrm(dest, src, destsize);
+	return strxfrm_l(dest, src, destsize, locale->info.lt);
 #else
 	/* shouldn't happen */
 	PGLOCALE_SUPPORT_ERROR(locale->provider);
@@ -2195,7 +2183,7 @@ pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize,
 	size_t		bufsize = srclen + 1;
 	size_t		result;
 
-	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+	Assert(locale->provider == COLLPROVIDER_LIBC);
 
 	if (bufsize > TEXTBUFLEN)
 		buf = palloc(bufsize);
@@ -2367,7 +2355,7 @@ pg_strnxfrm_prefix_icu(char *dest, const char *src, int32_t srclen,
 bool
 pg_strxfrm_enabled(pg_locale_t locale)
 {
-	if (!locale || locale->provider == COLLPROVIDER_LIBC)
+	if (locale->provider == COLLPROVIDER_LIBC)
 #ifdef TRUST_STRXFRM
 		return true;
 #else
@@ -2401,7 +2389,7 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
 {
 	size_t		result = 0;		/* keep compiler quiet */
 
-	if (!locale || locale->provider == COLLPROVIDER_LIBC)
+	if (locale->provider == COLLPROVIDER_LIBC)
 		result = pg_strxfrm_libc(dest, src, destsize, locale);
 #ifdef USE_ICU
 	else if (locale->provider == COLLPROVIDER_ICU)
@@ -2438,7 +2426,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen,
 {
 	size_t		result = 0;		/* keep compiler quiet */
 
-	if (!locale || locale->provider == COLLPROVIDER_LIBC)
+	if (locale->provider == COLLPROVIDER_LIBC)
 		result = pg_strnxfrm_libc(dest, src, srclen, destsize, locale);
 #ifdef USE_ICU
 	else if (locale->provider == COLLPROVIDER_ICU)
@@ -2458,7 +2446,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen,
 bool
 pg_strxfrm_prefix_enabled(pg_locale_t locale)
 {
-	if (!locale || locale->provider == COLLPROVIDER_LIBC)
+	if (locale->provider == COLLPROVIDER_LIBC)
 		return false;
 	else if (locale->provider == COLLPROVIDER_ICU)
 		return true;
@@ -2488,13 +2476,11 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
 {
 	size_t		result = 0;		/* keep compiler quiet */
 
-	if (!locale)
-		PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC);
 #ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
+	if (locale->provider == COLLPROVIDER_ICU)
 		result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
-#endif
 	else
+#endif
 		PGLOCALE_SUPPORT_ERROR(locale->provider);
 
 	return result;
@@ -2523,13 +2509,11 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
 {
 	size_t		result = 0;		/* keep compiler quiet */
 
-	if (!locale)
-		PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC);
 #ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
+	if (locale->provider == COLLPROVIDER_ICU)
 		result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
-#endif
 	else
+#endif
 		PGLOCALE_SUPPORT_ERROR(locale->provider);
 
 	return result;
@@ -3086,7 +3070,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
 {
 	size_t		result;
 
-	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+	Assert(locale->provider == COLLPROVIDER_LIBC);
 
 	if (tolen == 0)
 		return 0;
@@ -3114,12 +3098,6 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
 	}
 	else
 #endif							/* WIN32 */
-	if (locale == (pg_locale_t) 0)
-	{
-		/* Use wcstombs directly for the default locale */
-		result = wcstombs(to, from, tolen);
-	}
-	else
 	{
 		/* Use wcstombs_l for nondefault locales */
 		result = wcstombs_l(to, from, tolen, locale->info.lt);
@@ -3143,7 +3121,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
 {
 	size_t		result;
 
-	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+	Assert(locale->provider == COLLPROVIDER_LIBC);
 
 	if (tolen == 0)
 		return 0;
@@ -3176,16 +3154,8 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
 		/* mbstowcs requires ending '\0' */
 		char	   *str = pnstrdup(from, fromlen);
 
-		if (locale == (pg_locale_t) 0)
-		{
-			/* Use mbstowcs directly for the default locale */
-			result = mbstowcs(to, str, tolen);
-		}
-		else
-		{
-			/* Use mbstowcs_l for nondefault locales */
-			result = mbstowcs_l(to, str, tolen, locale->info.lt);
-		}
+		/* Use mbstowcs_l for nondefault locales */
+		result = mbstowcs_l(to, str, tolen, locale->info.lt);
 
 		pfree(str);
 	}
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 02dfe219f54..829375cd1a3 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -999,7 +999,7 @@ hashbpchar(PG_FUNCTION_ARGS)
 	Oid			collid = PG_GET_COLLATION();
 	char	   *keydata;
 	int			keylen;
-	pg_locale_t mylocale = 0;
+	pg_locale_t mylocale;
 	Datum		result;
 
 	if (!collid)
@@ -1011,8 +1011,7 @@ hashbpchar(PG_FUNCTION_ARGS)
 	keydata = VARDATA_ANY(key);
 	keylen = bcTruelen(key);
 
-	if (!lc_collate_is_c(collid))
-		mylocale = pg_newlocale_from_collation(collid);
+	mylocale = pg_newlocale_from_collation(collid);
 
 	if (pg_locale_deterministic(mylocale))
 	{
@@ -1054,7 +1053,7 @@ hashbpcharextended(PG_FUNCTION_ARGS)
 	Oid			collid = PG_GET_COLLATION();
 	char	   *keydata;
 	int			keylen;
-	pg_locale_t mylocale = 0;
+	pg_locale_t mylocale;
 	Datum		result;
 
 	if (!collid)
@@ -1066,8 +1065,7 @@ hashbpcharextended(PG_FUNCTION_ARGS)
 	keydata = VARDATA_ANY(key);
 	keylen = bcTruelen(key);
 
-	if (!lc_collate_is_c(collid))
-		mylocale = pg_newlocale_from_collation(collid);
+	mylocale = pg_newlocale_from_collation(collid);
 
 	if (pg_locale_deterministic(mylocale))
 	{
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d2e2e9bbba0..52ab8c43c66 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1217,12 +1217,11 @@ text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
 {
 	int			len1 = VARSIZE_ANY_EXHDR(t1);
 	int			len2 = VARSIZE_ANY_EXHDR(t2);
-	pg_locale_t mylocale = 0;
+	pg_locale_t mylocale;
 
 	check_collation_set(collid);
 
-	if (!lc_collate_is_c(collid))
-		mylocale = pg_newlocale_from_collation(collid);
+	mylocale = pg_newlocale_from_collation(collid);
 
 	if (!pg_locale_deterministic(mylocale))
 		ereport(ERROR,
@@ -1619,18 +1618,14 @@ Datum
 texteq(PG_FUNCTION_ARGS)
 {
 	Oid			collid = PG_GET_COLLATION();
-	bool		locale_is_c = false;
 	pg_locale_t mylocale = 0;
 	bool		result;
 
 	check_collation_set(collid);
 
-	if (lc_collate_is_c(collid))
-		locale_is_c = true;
-	else
-		mylocale = pg_newlocale_from_collation(collid);
+	mylocale = pg_newlocale_from_collation(collid);
 
-	if (locale_is_c || pg_locale_deterministic(mylocale))
+	if (pg_locale_deterministic(mylocale))
 	{
 		Datum		arg1 = PG_GETARG_DATUM(0);
 		Datum		arg2 = PG_GETARG_DATUM(1);
@@ -1678,18 +1673,14 @@ Datum
 textne(PG_FUNCTION_ARGS)
 {
 	Oid			collid = PG_GET_COLLATION();
-	bool		locale_is_c = false;
-	pg_locale_t mylocale = 0;
+	pg_locale_t mylocale;
 	bool		result;
 
 	check_collation_set(collid);
 
-	if (lc_collate_is_c(collid))
-		locale_is_c = true;
-	else
-		mylocale = pg_newlocale_from_collation(collid);
+	mylocale = pg_newlocale_from_collation(collid);
 
-	if (locale_is_c || pg_locale_deterministic(mylocale))
+	if (pg_locale_deterministic(mylocale))
 	{
 		Datum		arg1 = PG_GETARG_DATUM(0);
 		Datum		arg2 = PG_GETARG_DATUM(1);
@@ -1793,15 +1784,14 @@ text_starts_with(PG_FUNCTION_ARGS)
 	Datum		arg1 = PG_GETARG_DATUM(0);
 	Datum		arg2 = PG_GETARG_DATUM(1);
 	Oid			collid = PG_GET_COLLATION();
-	pg_locale_t mylocale = 0;
+	pg_locale_t mylocale;
 	bool		result;
 	Size		len1,
 				len2;
 
 	check_collation_set(collid);
 
-	if (!lc_collate_is_c(collid))
-		mylocale = pg_newlocale_from_collation(collid);
+	mylocale = pg_newlocale_from_collation(collid);
 
 	if (!pg_locale_deterministic(mylocale))
 		ereport(ERROR,
-- 
2.34.1

From 9bd779fb711c902b33cfd3a5350e0736d7ceb138 Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Mon, 29 Jul 2024 23:58:29 -0700
Subject: [PATCH v6 3/3] selfuncs.c: use pg_strxfrm() instead of strxfrm().

pg_strxfrm() takes a pg_locale_t, so it works properly with other
providers and does not rely on setlocale().

Discussion: https://postgr.es/m/cfd9eb85-c52a-4ec9-a90e-a5e4de56e...@eisentraut.org
Reviewed-by: Peter Eisentraut, Andreas Karlsson
---
 src/backend/utils/adt/pg_locale.c | 23 ++++++++++++++++-------
 src/backend/utils/adt/selfuncs.c  |  9 +++++++--
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index ccd6180a743..17c55c5ab17 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -2164,14 +2164,7 @@ pg_strxfrm_libc(char *dest, const char *src, size_t destsize,
 				pg_locale_t locale)
 {
 	Assert(locale->provider == COLLPROVIDER_LIBC);
-
-#ifdef TRUST_STRXFRM
 	return strxfrm_l(dest, src, destsize, locale->info.lt);
-#else
-	/* shouldn't happen */
-	PGLOCALE_SUPPORT_ERROR(locale->provider);
-	return 0;					/* keep compiler quiet */
-#endif
 }
 
 static size_t
@@ -2380,6 +2373,10 @@ pg_strxfrm_enabled(pg_locale_t locale)
  * The provided 'src' must be nul-terminated. If 'destsize' is zero, 'dest'
  * may be NULL.
  *
+ * Not all providers support pg_strxfrm() safely. The caller should check
+ * pg_strxfrm_enabled() first, otherwise this function may return wrong
+ * results or an error.
+ *
  * Returns the number of bytes needed to store the transformed string,
  * excluding the terminating nul byte. If the value returned is 'destsize' or
  * greater, the resulting contents of 'dest' are undefined.
@@ -2412,6 +2409,10 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
  * 'src' does not need to be nul-terminated. If 'destsize' is zero, 'dest' may
  * be NULL.
  *
+ * Not all providers support pg_strnxfrm() safely. The caller should check
+ * pg_strxfrm_enabled() first, otherwise this function may return wrong
+ * results or an error.
+ *
  * Returns the number of bytes needed to store the transformed string,
  * excluding the terminating nul byte. If the value returned is 'destsize' or
  * greater, the resulting contents of 'dest' are undefined.
@@ -2466,6 +2467,10 @@ pg_strxfrm_prefix_enabled(pg_locale_t locale)
  *
  * The provided 'src' must be nul-terminated.
  *
+ * Not all providers support pg_strxfrm_prefix() safely. The caller should
+ * check pg_strxfrm_prefix_enabled() first, otherwise this function may return
+ * wrong results or an error.
+ *
  * If destsize is not large enough to hold the resulting byte sequence, stores
  * only the first destsize bytes in 'dest'. Returns the number of bytes
  * actually copied to 'dest'.
@@ -2495,6 +2500,10 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
  *
  * The provided 'src' must be nul-terminated.
  *
+ * Not all providers support pg_strnxfrm_prefix() safely. The caller should
+ * check pg_strxfrm_prefix_enabled() first, otherwise this function may return
+ * wrong results or an error.
+ *
  * If destsize is not large enough to hold the resulting byte sequence, stores
  * only the first destsize bytes in 'dest'. Returns the number of bytes
  * actually copied to 'dest'.
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 877a62a62ec..673cfd9e703 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -4673,6 +4673,7 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
 
 	if (!lc_collate_is_c(collid))
 	{
+		pg_locale_t mylocale = pg_newlocale_from_collation(collid);
 		char	   *xfrmstr;
 		size_t		xfrmlen;
 		size_t		xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
@@ -4685,8 +4686,12 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
 		 * bogus data or set an error. This is not really a problem unless it
 		 * crashes since it will only give an estimation error and nothing
 		 * fatal.
+		 *
+		 * XXX: we do not check pg_strxfrm_enabled(). On some platforms and in
+		 * some cases, libc strxfrm() may return the wrong results, but that
+		 * will only lead to an estimation error.
 		 */
-		xfrmlen = strxfrm(NULL, val, 0);
+		xfrmlen = pg_strxfrm(NULL, val, 0, mylocale);
 #ifdef WIN32
 
 		/*
@@ -4698,7 +4703,7 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
 			return val;
 #endif
 		xfrmstr = (char *) palloc(xfrmlen + 1);
-		xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
+		xfrmlen2 = pg_strxfrm(xfrmstr, val, xfrmlen + 1, mylocale);
 
 		/*
 		 * Some systems (e.g., glibc) can return a smaller value from the
-- 
2.34.1

Reply via email to