On Fri, 2025-02-07 at 11:19 -0800, Jeff Davis wrote:
> 
> Attached v15. Just a rebase.

Attached v16.

> * commit this on the grounds that it's a desirable code improvement
> and
> the worst-case regression isn't a major concern; or

I plan to commit this soon after branching. There's a general consensus
that enabling multi-lib provider support is a good idea, and turning
the provider behavior into method tables is a prerequisite for that. I
doubt the performance issue will be a serious concern and I don't see a
good way to avoid it.

Regards,
        Jeff Davis

From c9a464483e3341ccb828ca0a6a5fd1b698d56cee Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Fri, 29 Nov 2024 09:37:43 -0800
Subject: [PATCH v16 1/4] Control ctype behavior internally with a method
 table.

Previously, pattern matching and case mapping behavior branched based
on the provider.

Refactor to use a method table, which is less error-prone and easier
to hook.
---
 src/backend/regex/regc_pg_locale.c        | 429 ++++------------------
 src/backend/utils/adt/like.c              |  22 +-
 src/backend/utils/adt/like_support.c      |   7 +-
 src/backend/utils/adt/pg_locale.c         | 121 +++---
 src/backend/utils/adt/pg_locale_builtin.c | 111 +++++-
 src/backend/utils/adt/pg_locale_icu.c     | 119 +++++-
 src/backend/utils/adt/pg_locale_libc.c    | 331 +++++++++++++++--
 src/include/utils/pg_locale.h             |  53 +++
 src/tools/pgindent/typedefs.list          |   1 -
 9 files changed, 686 insertions(+), 508 deletions(-)

diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 78193cfb964..d9eab5357bc 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -20,58 +20,13 @@
 #include "common/unicode_category.h"
 #include "utils/pg_locale.h"
 
-/*
- * For the libc provider, to provide as much functionality as possible on a
- * variety of platforms without going so far as to implement everything from
- * scratch, we use several implementation strategies depending on the
- * situation:
- *
- * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
- * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
- * collations don't give a fig about multibyte characters.
- *
- * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
- * This assumes that every platform uses Unicode codepoints directly
- * as the wchar_t representation of Unicode.  (XXX: ICU makes this assumption
- * even for non-UTF8 encodings, which may be a problem.)  On some platforms
- * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
- *
- * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
- * values up to 255, and punt for values above that.  This is 100% correct
- * only in single-byte encodings such as LATINn.  However, non-Unicode
- * multibyte encodings are mostly Far Eastern character sets for which the
- * properties being tested here aren't very relevant for higher code values
- * anyway.  The difficulty with using the <wctype.h> functions with
- * non-Unicode multibyte encodings is that we can have no certainty that
- * the platform's wchar_t representation matches what we do in pg_wchar
- * conversions.
- *
- * As a special case, in the "default" collation, (2) and (3) force ASCII
- * letters to follow ASCII upcase/downcase rules, while in a non-default
- * collation we just let the library functions do what they will.  The case
- * where this matters is treatment of I/i in Turkish, and the behavior is
- * meant to match the upper()/lower() SQL functions.
- *
- * We store the active collation setting in static variables.  In principle
- * it could be passed down to here via the regex library's "struct vars" data
- * structure; but that would require somewhat invasive changes in the regex
- * library, and right now there's no real benefit to be gained from that.
- *
- * NB: the coding here assumes pg_wchar is an unsigned type.
- */
-
-typedef enum
-{
-	PG_REGEX_STRATEGY_C,		/* C locale (encoding independent) */
-	PG_REGEX_STRATEGY_BUILTIN,	/* built-in Unicode semantics */
-	PG_REGEX_STRATEGY_LIBC_WIDE,	/* Use locale_t <wctype.h> functions */
-	PG_REGEX_STRATEGY_LIBC_1BYTE,	/* Use locale_t <ctype.h> functions */
-	PG_REGEX_STRATEGY_ICU,		/* Use ICU uchar.h functions */
-} PG_Locale_Strategy;
-
-static PG_Locale_Strategy pg_regex_strategy;
 static pg_locale_t pg_regex_locale;
 
+static struct pg_locale_struct dummy_c_locale = {
+	.collate_is_c = true,
+	.ctype_is_c = true,
+};
+
 /*
  * Hard-wired character properties for C locale
  */
@@ -228,7 +183,6 @@ void
 pg_set_regex_collation(Oid collation)
 {
 	pg_locale_t locale = 0;
-	PG_Locale_Strategy strategy;
 
 	if (!OidIsValid(collation))
 	{
@@ -249,8 +203,7 @@ pg_set_regex_collation(Oid collation)
 		 * catalog access is available, so we can't call
 		 * pg_newlocale_from_collation().
 		 */
-		strategy = PG_REGEX_STRATEGY_C;
-		locale = 0;
+		locale = &dummy_c_locale;
 	}
 	else
 	{
@@ -267,113 +220,41 @@ pg_set_regex_collation(Oid collation)
 			 * C/POSIX collations use this path regardless of database
 			 * encoding
 			 */
-			strategy = PG_REGEX_STRATEGY_C;
-			locale = 0;
-		}
-		else if (locale->provider == COLLPROVIDER_BUILTIN)
-		{
-			Assert(GetDatabaseEncoding() == PG_UTF8);
-			strategy = PG_REGEX_STRATEGY_BUILTIN;
-		}
-#ifdef USE_ICU
-		else if (locale->provider == COLLPROVIDER_ICU)
-		{
-			strategy = PG_REGEX_STRATEGY_ICU;
-		}
-#endif
-		else
-		{
-			Assert(locale->provider == COLLPROVIDER_LIBC);
-			if (GetDatabaseEncoding() == PG_UTF8)
-				strategy = PG_REGEX_STRATEGY_LIBC_WIDE;
-			else
-				strategy = PG_REGEX_STRATEGY_LIBC_1BYTE;
+			locale = &dummy_c_locale;
 		}
 	}
 
-	pg_regex_strategy = strategy;
 	pg_regex_locale = locale;
 }
 
 static int
 pg_wc_isdigit(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISDIGIT));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isdigit(c, !pg_regex_locale->info.builtin.casemap_full);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isdigit(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISDIGIT));
+	else
+		return pg_regex_locale->ctype->wc_isdigit(c, pg_regex_locale);
 }
 
 static int
 pg_wc_isalpha(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISALPHA));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isalpha(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isalpha(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISALPHA));
+	else
+		return pg_regex_locale->ctype->wc_isalpha(c, pg_regex_locale);
 }
 
 static int
 pg_wc_isalnum(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISALNUM));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isalnum(c, !pg_regex_locale->info.builtin.casemap_full);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isalnum(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISALNUM));
+	else
+		return pg_regex_locale->ctype->wc_isalnum(c, pg_regex_locale);
 }
 
 static int
@@ -388,231 +269,87 @@ pg_wc_isword(pg_wchar c)
 static int
 pg_wc_isupper(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISUPPER));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isupper(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isupper_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isupper(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISUPPER));
+	else
+		return pg_regex_locale->ctype->wc_isupper(c, pg_regex_locale);
 }
 
 static int
 pg_wc_islower(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISLOWER));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_islower(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					islower_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_islower(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISLOWER));
+	else
+		return pg_regex_locale->ctype->wc_islower(c, pg_regex_locale);
 }
 
 static int
 pg_wc_isgraph(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISGRAPH));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isgraph(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isgraph(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISGRAPH));
+	else
+		return pg_regex_locale->ctype->wc_isgraph(c, pg_regex_locale);
 }
 
 static int
 pg_wc_isprint(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISPRINT));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isprint(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isprint_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isprint(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISPRINT));
+	else
+		return pg_regex_locale->ctype->wc_isprint(c, pg_regex_locale);
 }
 
 static int
 pg_wc_ispunct(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISPUNCT));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_ispunct(c, !pg_regex_locale->info.builtin.casemap_full);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_ispunct(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISPUNCT));
+	else
+		return pg_regex_locale->ctype->wc_ispunct(c, pg_regex_locale);
 }
 
 static int
 pg_wc_isspace(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISSPACE));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isspace(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isspace_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isspace(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISSPACE));
+	else
+		return pg_regex_locale->ctype->wc_isspace(c, pg_regex_locale);
 }
 
 static pg_wchar
 pg_wc_toupper(pg_wchar c)
 {
-	switch (pg_regex_strategy)
+	if (pg_regex_locale->ctype_is_c)
 	{
-		case PG_REGEX_STRATEGY_C:
-			if (c <= (pg_wchar) 127)
-				return pg_ascii_toupper((unsigned char) c);
-			return c;
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return unicode_uppercase_simple(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			/* force C behavior for ASCII characters, per comments above */
-			if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
-				return pg_ascii_toupper((unsigned char) c);
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return towupper_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			/* force C behavior for ASCII characters, per comments above */
-			if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
-				return pg_ascii_toupper((unsigned char) c);
-			if (c <= (pg_wchar) UCHAR_MAX)
-				return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
-			return c;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_toupper(c);
-#endif
-			break;
+		if (c <= (pg_wchar) 127)
+			return pg_ascii_toupper((unsigned char) c);
+		return c;
 	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	else
+		return pg_regex_locale->ctype->wc_toupper(c, pg_regex_locale);
 }
 
 static pg_wchar
 pg_wc_tolower(pg_wchar c)
 {
-	switch (pg_regex_strategy)
+	if (pg_regex_locale->ctype_is_c)
 	{
-		case PG_REGEX_STRATEGY_C:
-			if (c <= (pg_wchar) 127)
-				return pg_ascii_tolower((unsigned char) c);
-			return c;
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return unicode_lowercase_simple(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			/* force C behavior for ASCII characters, per comments above */
-			if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
-				return pg_ascii_tolower((unsigned char) c);
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return towlower_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			/* force C behavior for ASCII characters, per comments above */
-			if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
-				return pg_ascii_tolower((unsigned char) c);
-			if (c <= (pg_wchar) UCHAR_MAX)
-				return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
-			return c;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_tolower(c);
-#endif
-			break;
+		if (c <= (pg_wchar) 127)
+			return pg_ascii_tolower((unsigned char) c);
+		return c;
 	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	else
+		return pg_regex_locale->ctype->wc_tolower(c, pg_regex_locale);
 }
 
 
@@ -738,37 +475,25 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
 	 * would always be true for production values of MAX_SIMPLE_CHR, but it's
 	 * useful to allow it to be small for testing purposes.)
 	 */
-	switch (pg_regex_strategy)
+	if (pg_regex_locale->ctype_is_c)
 	{
-		case PG_REGEX_STRATEGY_C:
 #if MAX_SIMPLE_CHR >= 127
-			max_chr = (pg_wchar) 127;
-			pcc->cv.cclasscode = -1;
+		max_chr = (pg_wchar) 127;
+		pcc->cv.cclasscode = -1;
 #else
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+		max_chr = (pg_wchar) MAX_SIMPLE_CHR;
 #endif
-			break;
-		case PG_REGEX_STRATEGY_BUILTIN:
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-			break;
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-			break;
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-#if MAX_SIMPLE_CHR >= UCHAR_MAX
-			max_chr = (pg_wchar) UCHAR_MAX;
+	}
+	else
+	{
+		if (pg_regex_locale->ctype->max_chr != 0 &&
+			pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR)
+		{
+			max_chr = pg_regex_locale->ctype->max_chr;
 			pcc->cv.cclasscode = -1;
-#else
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-#endif
-			break;
-		case PG_REGEX_STRATEGY_ICU:
+		}
+		else
 			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-			break;
-		default:
-			Assert(false);
-			max_chr = 0;		/* can't get here, but keep compiler quiet */
-			break;
 	}
 
 	/*
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 7f4cf614585..4216ac17f43 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -98,7 +98,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale)
 	else if (locale->is_default)
 		return pg_tolower(c);
 	else
-		return tolower_l(c, locale->info.lt);
+		return char_tolower(c, locale);
 }
 
 
@@ -209,7 +209,17 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 	 * way.
 	 */
 
-	if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU))
+	if (locale->ctype_is_c ||
+		(char_tolower_enabled(locale) &&
+		 pg_database_encoding_max_length() == 1))
+	{
+		p = VARDATA_ANY(pat);
+		plen = VARSIZE_ANY_EXHDR(pat);
+		s = VARDATA_ANY(str);
+		slen = VARSIZE_ANY_EXHDR(str);
+		return SB_IMatchText(s, slen, p, plen, locale);
+	}
+	else
 	{
 		pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
 													 PointerGetDatum(pat)));
@@ -224,14 +234,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 		else
 			return MB_MatchText(s, slen, p, plen, 0);
 	}
-	else
-	{
-		p = VARDATA_ANY(pat);
-		plen = VARSIZE_ANY_EXHDR(pat);
-		s = VARDATA_ANY(str);
-		slen = VARSIZE_ANY_EXHDR(str);
-		return SB_IMatchText(s, slen, p, plen, locale);
-	}
 }
 
 /*
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 8fdc677371f..999f23f86d5 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -1495,13 +1495,8 @@ pattern_char_isalpha(char c, bool is_multibyte,
 {
 	if (locale->ctype_is_c)
 		return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
-	else if (is_multibyte && IS_HIGHBIT_SET(c))
-		return true;
-	else if (locale->provider != COLLPROVIDER_LIBC)
-		return IS_HIGHBIT_SET(c) ||
-			(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
 	else
-		return isalpha_l((unsigned char) c, locale->info.lt);
+		return char_is_cased(c, locale);
 }
 
 
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index f5e31c433a0..451ac4e2d9b 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -80,31 +80,6 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
 extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
 extern char *get_collation_actual_version_libc(const char *collcollate);
 
-extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
-							   ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
-							   ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
-							   ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src,
-							  ssize_t srclen, pg_locale_t locale);
-
-extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
-						   ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
-						   ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
-						   ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_icu(char *dst, size_t dstsize, const char *src,
-						  ssize_t srclen, pg_locale_t locale);
-
-extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
-							ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
-							ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
-							ssize_t srclen, pg_locale_t locale);
-
 /* GUC settings */
 char	   *locale_messages;
 char	   *locale_monetary;
@@ -1093,6 +1068,9 @@ create_pg_locale(Oid collid, MemoryContext context)
 	Assert((result->collate_is_c && result->collate == NULL) ||
 		   (!result->collate_is_c && result->collate != NULL));
 
+	Assert((result->ctype_is_c && result->ctype == NULL) ||
+		   (!result->ctype_is_c && result->ctype != NULL));
+
 	datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
 							&isnull);
 	if (!isnull)
@@ -1257,77 +1235,31 @@ size_t
 pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
-	if (locale->provider == COLLPROVIDER_BUILTIN)
-		return strlower_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		return strlower_icu(dst, dstsize, src, srclen, locale);
-#endif
-	else if (locale->provider == COLLPROVIDER_LIBC)
-		return strlower_libc(dst, dstsize, src, srclen, locale);
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return 0;					/* keep compiler quiet */
+	return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
 }
 
 size_t
 pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
-	if (locale->provider == COLLPROVIDER_BUILTIN)
-		return strtitle_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		return strtitle_icu(dst, dstsize, src, srclen, locale);
-#endif
-	else if (locale->provider == COLLPROVIDER_LIBC)
-		return strtitle_libc(dst, dstsize, src, srclen, locale);
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return 0;					/* keep compiler quiet */
+	return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
 }
 
 size_t
 pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
-	if (locale->provider == COLLPROVIDER_BUILTIN)
-		return strupper_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		return strupper_icu(dst, dstsize, src, srclen, locale);
-#endif
-	else if (locale->provider == COLLPROVIDER_LIBC)
-		return strupper_libc(dst, dstsize, src, srclen, locale);
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return 0;					/* keep compiler quiet */
+	return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
 }
 
 size_t
 pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 		   pg_locale_t locale)
 {
-	if (locale->provider == COLLPROVIDER_BUILTIN)
-		return strfold_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		return strfold_icu(dst, dstsize, src, srclen, locale);
-#endif
-	/* for libc, just use strlower */
-	else if (locale->provider == COLLPROVIDER_LIBC)
-		return strlower_libc(dst, dstsize, src, srclen, locale);
+	if (locale->ctype->strfold)
+		return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
 	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return 0;					/* keep compiler quiet */
+		return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
 }
 
 /*
@@ -1464,6 +1396,41 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
 	return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale);
 }
 
+/*
+ * char_is_cased()
+ *
+ * Fuzzy test of whether the given char is case-varying or not. The argument
+ * is a single byte, so in a multibyte encoding, just assume any non-ASCII
+ * char is case-varying.
+ */
+bool
+char_is_cased(char ch, pg_locale_t locale)
+{
+	return locale->ctype->char_is_cased(ch, locale);
+}
+
+/*
+ * char_tolower_enabled()
+ *
+ * Does the provider support char_tolower()?
+ */
+bool
+char_tolower_enabled(pg_locale_t locale)
+{
+	return (locale->ctype->char_tolower != NULL);
+}
+
+/*
+ * char_tolower()
+ *
+ * Convert char (single-byte encoding) to lowercase.
+ */
+char
+char_tolower(unsigned char ch, pg_locale_t locale)
+{
+	return locale->ctype->char_tolower(ch, locale);
+}
+
 /*
  * Return required encoding ID for the given locale, or -1 if any encoding is
  * valid for the locale.
diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index f51768830cd..6fe090708ff 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -25,15 +25,6 @@
 extern pg_locale_t create_pg_locale_builtin(Oid collid,
 											MemoryContext context);
 extern char *get_collation_actual_version_builtin(const char *collcollate);
-extern size_t strlower_builtin(char *dest, size_t destsize, const char *src,
-							   ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_builtin(char *dest, size_t destsize, const char *src,
-							   ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_builtin(char *dest, size_t destsize, const char *src,
-							   ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_builtin(char *dest, size_t destsize, const char *src,
-							  ssize_t srclen, pg_locale_t locale);
-
 
 struct WordBoundaryState
 {
@@ -77,7 +68,7 @@ initcap_wbnext(void *state)
 	return wbstate->len;
 }
 
-size_t
+static size_t
 strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
 {
@@ -85,7 +76,7 @@ strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 							locale->info.builtin.casemap_full);
 }
 
-size_t
+static size_t
 strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
 {
@@ -103,7 +94,7 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 							initcap_wbnext, &wbstate);
 }
 
-size_t
+static size_t
 strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
 {
@@ -111,7 +102,7 @@ strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 							locale->info.builtin.casemap_full);
 }
 
-size_t
+static size_t
 strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				pg_locale_t locale)
 {
@@ -119,6 +110,98 @@ strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 						   locale->info.builtin.casemap_full);
 }
 
+static bool
+wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_isdigit(wc, !locale->info.builtin.casemap_full);
+}
+
+static bool
+wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_isalpha(wc);
+}
+
+static bool
+wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_isalnum(wc, !locale->info.builtin.casemap_full);
+}
+
+static bool
+wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_isupper(wc);
+}
+
+static bool
+wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_islower(wc);
+}
+
+static bool
+wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_isgraph(wc);
+}
+
+static bool
+wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_isprint(wc);
+}
+
+static bool
+wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_ispunct(wc, !locale->info.builtin.casemap_full);
+}
+
+static bool
+wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_isspace(wc);
+}
+
+static bool
+char_is_cased_builtin(char ch, pg_locale_t locale)
+{
+	return IS_HIGHBIT_SET(ch) ||
+		(ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
+}
+
+static pg_wchar
+wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return unicode_uppercase_simple(wc);
+}
+
+static pg_wchar
+wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return unicode_lowercase_simple(wc);
+}
+
+static const struct ctype_methods ctype_methods_builtin = {
+	.strlower = strlower_builtin,
+	.strtitle = strtitle_builtin,
+	.strupper = strupper_builtin,
+	.strfold = strfold_builtin,
+	.wc_isdigit = wc_isdigit_builtin,
+	.wc_isalpha = wc_isalpha_builtin,
+	.wc_isalnum = wc_isalnum_builtin,
+	.wc_isupper = wc_isupper_builtin,
+	.wc_islower = wc_islower_builtin,
+	.wc_isgraph = wc_isgraph_builtin,
+	.wc_isprint = wc_isprint_builtin,
+	.wc_ispunct = wc_ispunct_builtin,
+	.wc_isspace = wc_isspace_builtin,
+	.char_is_cased = char_is_cased_builtin,
+	.wc_tolower = wc_tolower_builtin,
+	.wc_toupper = wc_toupper_builtin,
+};
+
 pg_locale_t
 create_pg_locale_builtin(Oid collid, MemoryContext context)
 {
@@ -162,6 +245,8 @@ create_pg_locale_builtin(Oid collid, MemoryContext context)
 	result->deterministic = true;
 	result->collate_is_c = true;
 	result->ctype_is_c = (strcmp(locstr, "C") == 0);
+	if (!result->ctype_is_c)
+		result->ctype = &ctype_methods_builtin;
 
 	return result;
 }
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index a32c32a0744..1f4ee2d1990 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -48,19 +48,22 @@
 #define		TEXTBUFLEN			1024
 
 extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
-extern size_t strlower_icu(char *dest, size_t destsize, const char *src,
-						   ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_icu(char *dest, size_t destsize, const char *src,
-						   ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_icu(char *dest, size_t destsize, const char *src,
-						   ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_icu(char *dest, size_t destsize, const char *src,
-						  ssize_t srclen, pg_locale_t locale);
 
 #ifdef USE_ICU
 
 extern UCollator *pg_ucol_open(const char *loc_str);
 
+static size_t strlower_icu(char *dest, size_t destsize, const char *src,
+						   ssize_t srclen, pg_locale_t locale);
+static size_t strtitle_icu(char *dest, size_t destsize, const char *src,
+						   ssize_t srclen, pg_locale_t locale);
+static size_t strupper_icu(char *dest, size_t destsize, const char *src,
+						   ssize_t srclen, pg_locale_t locale);
+static size_t strfold_icu(char *dest, size_t destsize, const char *src,
+						  ssize_t srclen, pg_locale_t locale);
+static int	strncoll_icu(const char *arg1, ssize_t len1,
+						 const char *arg2, ssize_t len2,
+						 pg_locale_t locale);
 static size_t strnxfrm_icu(char *dest, size_t destsize,
 						   const char *src, ssize_t srclen,
 						   pg_locale_t locale);
@@ -118,6 +121,25 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
 									 const char *locale,
 									 UErrorCode *pErrorCode);
 
+static bool
+char_is_cased_icu(char ch, pg_locale_t locale)
+{
+	return IS_HIGHBIT_SET(ch) ||
+		(ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
+}
+
+static pg_wchar
+toupper_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_toupper(wc);
+}
+
+static pg_wchar
+tolower_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_tolower(wc);
+}
+
 static const struct collate_methods collate_methods_icu = {
 	.strncoll = strncoll_icu,
 	.strnxfrm = strnxfrm_icu,
@@ -136,6 +158,78 @@ static const struct collate_methods collate_methods_icu_utf8 = {
 	.strxfrm_is_safe = true,
 };
 
+static bool
+wc_isdigit_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_isdigit(wc);
+}
+
+static bool
+wc_isalpha_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_isalpha(wc);
+}
+
+static bool
+wc_isalnum_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_isalnum(wc);
+}
+
+static bool
+wc_isupper_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_isupper(wc);
+}
+
+static bool
+wc_islower_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_islower(wc);
+}
+
+static bool
+wc_isgraph_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_isgraph(wc);
+}
+
+static bool
+wc_isprint_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_isprint(wc);
+}
+
+static bool
+wc_ispunct_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_ispunct(wc);
+}
+
+static bool
+wc_isspace_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_isspace(wc);
+}
+
+static const struct ctype_methods ctype_methods_icu = {
+	.strlower = strlower_icu,
+	.strtitle = strtitle_icu,
+	.strupper = strupper_icu,
+	.strfold = strfold_icu,
+	.wc_isdigit = wc_isdigit_icu,
+	.wc_isalpha = wc_isalpha_icu,
+	.wc_isalnum = wc_isalnum_icu,
+	.wc_isupper = wc_isupper_icu,
+	.wc_islower = wc_islower_icu,
+	.wc_isgraph = wc_isgraph_icu,
+	.wc_isprint = wc_isprint_icu,
+	.wc_ispunct = wc_ispunct_icu,
+	.wc_isspace = wc_isspace_icu,
+	.char_is_cased = char_is_cased_icu,
+	.wc_toupper = toupper_icu,
+	.wc_tolower = tolower_icu,
+};
 #endif
 
 pg_locale_t
@@ -206,6 +300,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 		result->collate = &collate_methods_icu_utf8;
 	else
 		result->collate = &collate_methods_icu;
+	result->ctype = &ctype_methods_icu;
 
 	return result;
 #else
@@ -379,7 +474,7 @@ make_icu_collator(const char *iculocstr, const char *icurules)
 	}
 }
 
-size_t
+static size_t
 strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 			 pg_locale_t locale)
 {
@@ -399,7 +494,7 @@ strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	return result_len;
 }
 
-size_t
+static size_t
 strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 			 pg_locale_t locale)
 {
@@ -419,7 +514,7 @@ strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	return result_len;
 }
 
-size_t
+static size_t
 strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 			 pg_locale_t locale)
 {
@@ -439,7 +534,7 @@ strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	return result_len;
 }
 
-size_t
+static size_t
 strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 199857e22db..be714db5283 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -33,6 +33,46 @@
 #include <shlwapi.h>
 #endif
 
+/*
+ * For the libc provider, to provide as much functionality as possible on a
+ * variety of platforms without going so far as to implement everything from
+ * scratch, we use several implementation strategies depending on the
+ * situation:
+ *
+ * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
+ * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
+ * collations don't give a fig about multibyte characters.
+ *
+ * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
+ * This assumes that every platform uses Unicode codepoints directly
+ * as the wchar_t representation of Unicode.  (XXX: ICU makes this assumption
+ * even for non-UTF8 encodings, which may be a problem.)  On some platforms
+ * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
+ *
+ * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
+ * values up to 255, and punt for values above that.  This is 100% correct
+ * only in single-byte encodings such as LATINn.  However, non-Unicode
+ * multibyte encodings are mostly Far Eastern character sets for which the
+ * properties being tested here aren't very relevant for higher code values
+ * anyway.  The difficulty with using the <wctype.h> functions with
+ * non-Unicode multibyte encodings is that we can have no certainty that
+ * the platform's wchar_t representation matches what we do in pg_wchar
+ * conversions.
+ *
+ * As a special case, in the "default" collation, (2) and (3) force ASCII
+ * letters to follow ASCII upcase/downcase rules, while in a non-default
+ * collation we just let the library functions do what they will.  The case
+ * where this matters is treatment of I/i in Turkish, and the behavior is
+ * meant to match the upper()/lower() SQL functions.
+ *
+ * We store the active collation setting in static variables.  In principle
+ * it could be passed down to here via the regex library's "struct vars" data
+ * structure; but that would require somewhat invasive changes in the regex
+ * library, and right now there's no real benefit to be gained from that.
+ *
+ * NB: the coding here assumes pg_wchar is an unsigned type.
+ */
+
 /*
  * Size of stack buffer to use for string transformations, used to avoid heap
  * allocations in typical cases. This should be large enough that most strings
@@ -43,13 +83,6 @@
 
 extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
 
-extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
-							ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
-							ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
-							ssize_t srclen, pg_locale_t locale);
-
 static int	strncoll_libc(const char *arg1, ssize_t len1,
 						  const char *arg2, ssize_t len2,
 						  pg_locale_t locale);
@@ -85,6 +118,251 @@ static size_t strupper_libc_mb(char *dest, size_t destsize,
 							   const char *src, ssize_t srclen,
 							   pg_locale_t locale);
 
+static bool
+wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return isdigit_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return isalpha_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return isalnum_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return isupper_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return islower_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return isgraph_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return isprint_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return ispunct_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return isspace_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswdigit_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswalpha_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswalnum_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswupper_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswlower_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswgraph_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswprint_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswpunct_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswspace_l((wint_t) wc, locale->info.lt);
+}
+
+static char
+char_tolower_libc(unsigned char ch, pg_locale_t locale)
+{
+	Assert(pg_database_encoding_max_length() == 1);
+	return tolower_l(ch, locale->info.lt);
+}
+
+static bool
+char_is_cased_libc(char ch, pg_locale_t locale)
+{
+	bool		is_multibyte = pg_database_encoding_max_length() > 1;
+
+	if (is_multibyte && IS_HIGHBIT_SET(ch))
+		return true;
+	else
+		return isalpha_l((unsigned char) ch, locale->info.lt);
+}
+
+static pg_wchar
+toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	Assert(GetDatabaseEncoding() != PG_UTF8);
+
+	/* force C behavior for ASCII characters, per comments above */
+	if (locale->is_default && wc <= (pg_wchar) 127)
+		return pg_ascii_toupper((unsigned char) wc);
+	if (wc <= (pg_wchar) UCHAR_MAX)
+		return toupper_l((unsigned char) wc, locale->info.lt);
+	else
+		return wc;
+}
+
+static pg_wchar
+toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	Assert(GetDatabaseEncoding() == PG_UTF8);
+
+	/* force C behavior for ASCII characters, per comments above */
+	if (locale->is_default && wc <= (pg_wchar) 127)
+		return pg_ascii_toupper((unsigned char) wc);
+	if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
+		return towupper_l((wint_t) wc, locale->info.lt);
+	else
+		return wc;
+}
+
+static pg_wchar
+tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	Assert(GetDatabaseEncoding() != PG_UTF8);
+
+	/* force C behavior for ASCII characters, per comments above */
+	if (locale->is_default && wc <= (pg_wchar) 127)
+		return pg_ascii_tolower((unsigned char) wc);
+	if (wc <= (pg_wchar) UCHAR_MAX)
+		return tolower_l((unsigned char) wc, locale->info.lt);
+	else
+		return wc;
+}
+
+static pg_wchar
+tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	Assert(GetDatabaseEncoding() == PG_UTF8);
+
+	/* force C behavior for ASCII characters, per comments above */
+	if (locale->is_default && wc <= (pg_wchar) 127)
+		return pg_ascii_tolower((unsigned char) wc);
+	if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
+		return towlower_l((wint_t) wc, locale->info.lt);
+	else
+		return wc;
+}
+
+static const struct ctype_methods ctype_methods_libc_sb = {
+	.strlower = strlower_libc_sb,
+	.strtitle = strtitle_libc_sb,
+	.strupper = strupper_libc_sb,
+	.wc_isdigit = wc_isdigit_libc_sb,
+	.wc_isalpha = wc_isalpha_libc_sb,
+	.wc_isalnum = wc_isalnum_libc_sb,
+	.wc_isupper = wc_isupper_libc_sb,
+	.wc_islower = wc_islower_libc_sb,
+	.wc_isgraph = wc_isgraph_libc_sb,
+	.wc_isprint = wc_isprint_libc_sb,
+	.wc_ispunct = wc_ispunct_libc_sb,
+	.wc_isspace = wc_isspace_libc_sb,
+	.char_is_cased = char_is_cased_libc,
+	.char_tolower = char_tolower_libc,
+	.wc_toupper = toupper_libc_sb,
+	.wc_tolower = tolower_libc_sb,
+	.max_chr = UCHAR_MAX,
+};
+
+/*
+ * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
+ * single-byte semantics for pattern matching.
+ */
+static const struct ctype_methods ctype_methods_libc_other_mb = {
+	.strlower = strlower_libc_mb,
+	.strtitle = strtitle_libc_mb,
+	.strupper = strupper_libc_mb,
+	.wc_isdigit = wc_isdigit_libc_sb,
+	.wc_isalpha = wc_isalpha_libc_sb,
+	.wc_isalnum = wc_isalnum_libc_sb,
+	.wc_isupper = wc_isupper_libc_sb,
+	.wc_islower = wc_islower_libc_sb,
+	.wc_isgraph = wc_isgraph_libc_sb,
+	.wc_isprint = wc_isprint_libc_sb,
+	.wc_ispunct = wc_ispunct_libc_sb,
+	.wc_isspace = wc_isspace_libc_sb,
+	.char_is_cased = char_is_cased_libc,
+	.char_tolower = char_tolower_libc,
+	.wc_toupper = toupper_libc_sb,
+	.wc_tolower = tolower_libc_sb,
+	.max_chr = UCHAR_MAX,
+};
+
+static const struct ctype_methods ctype_methods_libc_utf8 = {
+	.strlower = strlower_libc_mb,
+	.strtitle = strtitle_libc_mb,
+	.strupper = strupper_libc_mb,
+	.wc_isdigit = wc_isdigit_libc_mb,
+	.wc_isalpha = wc_isalpha_libc_mb,
+	.wc_isalnum = wc_isalnum_libc_mb,
+	.wc_isupper = wc_isupper_libc_mb,
+	.wc_islower = wc_islower_libc_mb,
+	.wc_isgraph = wc_isgraph_libc_mb,
+	.wc_isprint = wc_isprint_libc_mb,
+	.wc_ispunct = wc_ispunct_libc_mb,
+	.wc_isspace = wc_isspace_libc_mb,
+	.char_is_cased = char_is_cased_libc,
+	.char_tolower = char_tolower_libc,
+	.wc_toupper = toupper_libc_mb,
+	.wc_tolower = tolower_libc_mb,
+};
+
 static const struct collate_methods collate_methods_libc = {
 	.strncoll = strncoll_libc,
 	.strnxfrm = strnxfrm_libc,
@@ -119,36 +397,6 @@ static const struct collate_methods collate_methods_libc_win32_utf8 = {
 };
 #endif
 
-size_t
-strlower_libc(char *dst, size_t dstsize, const char *src,
-			  ssize_t srclen, pg_locale_t locale)
-{
-	if (pg_database_encoding_max_length() > 1)
-		return strlower_libc_mb(dst, dstsize, src, srclen, locale);
-	else
-		return strlower_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
-size_t
-strtitle_libc(char *dst, size_t dstsize, const char *src,
-			  ssize_t srclen, pg_locale_t locale)
-{
-	if (pg_database_encoding_max_length() > 1)
-		return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
-	else
-		return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
-size_t
-strupper_libc(char *dst, size_t dstsize, const char *src,
-			  ssize_t srclen, pg_locale_t locale)
-{
-	if (pg_database_encoding_max_length() > 1)
-		return strupper_libc_mb(dst, dstsize, src, srclen, locale);
-	else
-		return strupper_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
 static size_t
 strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
@@ -481,6 +729,15 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
 #endif
 			result->collate = &collate_methods_libc;
 	}
+	if (!result->ctype_is_c)
+	{
+		if (GetDatabaseEncoding() == PG_UTF8)
+			result->ctype = &ctype_methods_libc_utf8;
+		else if (pg_database_encoding_max_length() > 1)
+			result->ctype = &ctype_methods_libc_other_mb;
+		else
+			result->ctype = &ctype_methods_libc_sb;
+	}
 
 	return result;
 }
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 7b8cbf58d2c..0f497fa8ce2 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -12,6 +12,8 @@
 #ifndef _PG_LOCALE_
 #define _PG_LOCALE_
 
+#include "mb/pg_wchar.h"
+
 #ifdef USE_ICU
 #include <unicode/ucol.h>
 #endif
@@ -77,6 +79,52 @@ struct collate_methods
 	bool		strxfrm_is_safe;
 };
 
+struct ctype_methods
+{
+	/* case mapping: LOWER()/INITCAP()/UPPER() */
+	size_t		(*strlower) (char *dest, size_t destsize,
+							 const char *src, ssize_t srclen,
+							 pg_locale_t locale);
+	size_t		(*strtitle) (char *dest, size_t destsize,
+							 const char *src, ssize_t srclen,
+							 pg_locale_t locale);
+	size_t		(*strupper) (char *dest, size_t destsize,
+							 const char *src, ssize_t srclen,
+							 pg_locale_t locale);
+	size_t		(*strfold) (char *dest, size_t destsize,
+							const char *src, ssize_t srclen,
+							pg_locale_t locale);
+
+	/* required */
+	bool		(*wc_isdigit) (pg_wchar wc, pg_locale_t locale);
+	bool		(*wc_isalpha) (pg_wchar wc, pg_locale_t locale);
+	bool		(*wc_isalnum) (pg_wchar wc, pg_locale_t locale);
+	bool		(*wc_isupper) (pg_wchar wc, pg_locale_t locale);
+	bool		(*wc_islower) (pg_wchar wc, pg_locale_t locale);
+	bool		(*wc_isgraph) (pg_wchar wc, pg_locale_t locale);
+	bool		(*wc_isprint) (pg_wchar wc, pg_locale_t locale);
+	bool		(*wc_ispunct) (pg_wchar wc, pg_locale_t locale);
+	bool		(*wc_isspace) (pg_wchar wc, pg_locale_t locale);
+	pg_wchar	(*wc_toupper) (pg_wchar wc, pg_locale_t locale);
+	pg_wchar	(*wc_tolower) (pg_wchar wc, pg_locale_t locale);
+
+	/* required */
+	bool		(*char_is_cased) (char ch, pg_locale_t locale);
+
+	/*
+	 * Optional. If defined, will only be called for single-byte encodings. If
+	 * not defined, or if the encoding is multibyte, will fall back to
+	 * pg_strlower().
+	 */
+	char		(*char_tolower) (unsigned char ch, pg_locale_t locale);
+
+	/*
+	 * For regex and pattern matching efficiency, the maximum char value
+	 * supported by the above methods. If zero, limit is set by regex code.
+	 */
+	pg_wchar	max_chr;
+};
+
 /*
  * We use a discriminated union to hold either a locale_t or an ICU collator.
  * pg_locale_t is occasionally checked for truth, so make it a pointer.
@@ -102,6 +150,7 @@ struct pg_locale_struct
 	bool		is_default;
 
 	const struct collate_methods *collate;	/* NULL if collate_is_c */
+	const struct ctype_methods *ctype;	/* NULL if ctype_is_c */
 
 	union
 	{
@@ -125,6 +174,10 @@ extern void init_database_collation(void);
 extern pg_locale_t pg_newlocale_from_collation(Oid collid);
 
 extern char *get_collation_actual_version(char collprovider, const char *collcollate);
+
+extern bool char_is_cased(char ch, pg_locale_t locale);
+extern bool char_tolower_enabled(pg_locale_t locale);
+extern char char_tolower(unsigned char ch, pg_locale_t locale);
 extern size_t pg_strlower(char *dst, size_t dstsize,
 						  const char *src, ssize_t srclen,
 						  pg_locale_t locale);
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index a8346cda633..eea8f1eee6b 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1872,7 +1872,6 @@ PGTargetServerType
 PGTernaryBool
 PGTransactionStatusType
 PGVerbosity
-PG_Locale_Strategy
 PG_Lock_Status
 PG_init_t
 PGauthData
-- 
2.43.0

From a95b771572d625b8bbcdf60955f11076b633267d Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Mon, 7 Oct 2024 12:51:27 -0700
Subject: [PATCH v16 2/4] Remove provider field from pg_locale_t.

The behavior of pg_locale_t is entirely specified by methods, so a
separate provider field is no longer necessary.
---
 src/backend/utils/adt/pg_locale_builtin.c |  1 -
 src/backend/utils/adt/pg_locale_icu.c     | 11 -----------
 src/backend/utils/adt/pg_locale_libc.c    |  6 ------
 src/include/utils/pg_locale.h             |  1 -
 4 files changed, 19 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index 6fe090708ff..3fe41c880a6 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -241,7 +241,6 @@ create_pg_locale_builtin(Oid collid, MemoryContext context)
 
 	result->info.builtin.locale = MemoryContextStrdup(context, locstr);
 	result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
-	result->provider = COLLPROVIDER_BUILTIN;
 	result->deterministic = true;
 	result->collate_is_c = true;
 	result->ctype_is_c = (strcmp(locstr, "C") == 0);
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index 1f4ee2d1990..96741e08269 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -292,7 +292,6 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 	result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
 	result->info.icu.locale = MemoryContextStrdup(context, iculocstr);
 	result->info.icu.ucol = collator;
-	result->provider = COLLPROVIDER_ICU;
 	result->deterministic = deterministic;
 	result->collate_is_c = false;
 	result->ctype_is_c = false;
@@ -569,8 +568,6 @@ strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2
 	int			result;
 	UErrorCode	status;
 
-	Assert(locale->provider == COLLPROVIDER_ICU);
-
 	Assert(GetDatabaseEncoding() == PG_UTF8);
 
 	status = U_ZERO_ERROR;
@@ -598,8 +595,6 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	size_t		uchar_bsize;
 	Size		result_bsize;
 
-	Assert(locale->provider == COLLPROVIDER_ICU);
-
 	init_icu_converter();
 
 	ulen = uchar_length(icu_converter, src, srclen);
@@ -644,8 +639,6 @@ strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
 	uint32_t	state[2];
 	UErrorCode	status;
 
-	Assert(locale->provider == COLLPROVIDER_ICU);
-
 	Assert(GetDatabaseEncoding() == PG_UTF8);
 
 	uiter_setUTF8(&iter, src, srclen);
@@ -844,8 +837,6 @@ strncoll_icu(const char *arg1, ssize_t len1,
 			   *uchar2;
 	int			result;
 
-	Assert(locale->provider == COLLPROVIDER_ICU);
-
 	/* if encoding is UTF8, use more efficient strncoll_icu_utf8 */
 #ifdef HAVE_UCOL_STRCOLLUTF8
 	Assert(GetDatabaseEncoding() != PG_UTF8);
@@ -894,8 +885,6 @@ strnxfrm_prefix_icu(char *dest, size_t destsize,
 	size_t		uchar_bsize;
 	Size		result_bsize;
 
-	Assert(locale->provider == COLLPROVIDER_ICU);
-
 	/* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */
 	Assert(GetDatabaseEncoding() != PG_UTF8);
 
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index be714db5283..e9f9fc1e369 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -713,7 +713,6 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
 	loc = make_libc_collator(collate, ctype);
 
 	result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
-	result->provider = COLLPROVIDER_LIBC;
 	result->deterministic = true;
 	result->collate_is_c = (strcmp(collate, "C") == 0) ||
 		(strcmp(collate, "POSIX") == 0);
@@ -833,8 +832,6 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
 	const char *arg2n;
 	int			result;
 
-	Assert(locale->provider == COLLPROVIDER_LIBC);
-
 	if (bufsize1 + bufsize2 > TEXTBUFLEN)
 		buf = palloc(bufsize1 + bufsize2);
 
@@ -889,8 +886,6 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	size_t		bufsize = srclen + 1;
 	size_t		result;
 
-	Assert(locale->provider == COLLPROVIDER_LIBC);
-
 	if (srclen == -1)
 		return strxfrm_l(dest, src, destsize, locale->info.lt);
 
@@ -999,7 +994,6 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
 	int			r;
 	int			result;
 
-	Assert(locale->provider == COLLPROVIDER_LIBC);
 	Assert(GetDatabaseEncoding() == PG_UTF8);
 
 	if (len1 == -1)
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 0f497fa8ce2..44ff60a25b4 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -143,7 +143,6 @@ struct ctype_methods
  */
 struct pg_locale_struct
 {
-	char		provider;
 	bool		deterministic;
 	bool		collate_is_c;
 	bool		ctype_is_c;
-- 
2.43.0

From 3c25c6dff7842ad0f5c2cdb880e3d745742ec3d7 Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Mon, 7 Oct 2024 13:36:44 -0700
Subject: [PATCH v16 3/4] Make provider data in pg_locale_t an opaque pointer.

---
 src/backend/utils/adt/pg_locale_builtin.c |  55 +++++--
 src/backend/utils/adt/pg_locale_icu.c     |  40 ++++--
 src/backend/utils/adt/pg_locale_libc.c    | 167 +++++++++++++++-------
 src/include/utils/pg_locale.h             |  17 +--
 4 files changed, 196 insertions(+), 83 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index 3fe41c880a6..2d095527e96 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -26,6 +26,12 @@ extern pg_locale_t create_pg_locale_builtin(Oid collid,
 											MemoryContext context);
 extern char *get_collation_actual_version_builtin(const char *collcollate);
 
+struct builtin_provider
+{
+	const char *locale;
+	bool		casemap_full;
+};
+
 struct WordBoundaryState
 {
 	const char *str;
@@ -72,25 +78,30 @@ static size_t
 strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
 {
+	struct builtin_provider *builtin;
+
+	builtin = (struct builtin_provider *) locale->provider_data;
+
 	return unicode_strlower(dest, destsize, src, srclen,
-							locale->info.builtin.casemap_full);
+							builtin->casemap_full);
 }
 
 static size_t
 strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
 {
+	struct builtin_provider *builtin = (struct builtin_provider *) locale->provider_data;
 	struct WordBoundaryState wbstate = {
 		.str = src,
 		.len = srclen,
 		.offset = 0,
-		.posix = !locale->info.builtin.casemap_full,
+		.posix = !builtin->casemap_full,
 		.init = false,
 		.prev_alnum = false,
 	};
 
 	return unicode_strtitle(dest, destsize, src, srclen,
-							locale->info.builtin.casemap_full,
+							builtin->casemap_full,
 							initcap_wbnext, &wbstate);
 }
 
@@ -98,22 +109,34 @@ static size_t
 strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
 {
+	struct builtin_provider *builtin;
+
+	builtin = (struct builtin_provider *) locale->provider_data;
+
 	return unicode_strupper(dest, destsize, src, srclen,
-							locale->info.builtin.casemap_full);
+							builtin->casemap_full);
 }
 
 static size_t
 strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				pg_locale_t locale)
 {
+	struct builtin_provider *builtin;
+
+	builtin = (struct builtin_provider *) locale->provider_data;
+
 	return unicode_strfold(dest, destsize, src, srclen,
-						   locale->info.builtin.casemap_full);
+						   builtin->casemap_full);
 }
 
 static bool
 wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
 {
-	return pg_u_isdigit(wc, !locale->info.builtin.casemap_full);
+	struct builtin_provider *builtin;
+
+	builtin = (struct builtin_provider *) locale->provider_data;
+
+	return pg_u_isdigit(wc, !builtin->casemap_full);
 }
 
 static bool
@@ -125,7 +148,11 @@ wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
 static bool
 wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
 {
-	return pg_u_isalnum(wc, !locale->info.builtin.casemap_full);
+	struct builtin_provider *builtin;
+
+	builtin = (struct builtin_provider *) locale->provider_data;
+
+	return pg_u_isalnum(wc, !builtin->casemap_full);
 }
 
 static bool
@@ -155,7 +182,11 @@ wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
 static bool
 wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
 {
-	return pg_u_ispunct(wc, !locale->info.builtin.casemap_full);
+	struct builtin_provider *builtin;
+
+	builtin = (struct builtin_provider *) locale->provider_data;
+
+	return pg_u_ispunct(wc, !builtin->casemap_full);
 }
 
 static bool
@@ -206,6 +237,7 @@ pg_locale_t
 create_pg_locale_builtin(Oid collid, MemoryContext context)
 {
 	const char *locstr;
+	struct builtin_provider *builtin;
 	pg_locale_t result;
 
 	if (collid == DEFAULT_COLLATION_OID)
@@ -239,8 +271,11 @@ create_pg_locale_builtin(Oid collid, MemoryContext context)
 
 	result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
 
-	result->info.builtin.locale = MemoryContextStrdup(context, locstr);
-	result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
+	builtin = MemoryContextAllocZero(context, sizeof(struct builtin_provider));
+	builtin->locale = MemoryContextStrdup(context, locstr);
+	builtin->casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
+	result->provider_data = (void *) builtin;
+
 	result->deterministic = true;
 	result->collate_is_c = true;
 	result->ctype_is_c = (strcmp(locstr, "C") == 0);
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index 96741e08269..497be95a869 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -51,6 +51,12 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
 
 #ifdef USE_ICU
 
+struct icu_provider
+{
+	const char *locale;
+	UCollator  *ucol;
+};
+
 extern UCollator *pg_ucol_open(const char *loc_str);
 
 static size_t strlower_icu(char *dest, size_t destsize, const char *src,
@@ -239,6 +245,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 	bool		deterministic;
 	const char *iculocstr;
 	const char *icurules = NULL;
+	struct icu_provider *icu;
 	UCollator  *collator;
 	pg_locale_t result;
 
@@ -290,8 +297,12 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 	collator = make_icu_collator(iculocstr, icurules);
 
 	result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
-	result->info.icu.locale = MemoryContextStrdup(context, iculocstr);
-	result->info.icu.ucol = collator;
+
+	icu = MemoryContextAllocZero(context, sizeof(struct icu_provider));
+	icu->locale = MemoryContextStrdup(context, iculocstr);
+	icu->ucol = collator;
+	result->provider_data = (void *) icu;
+
 	result->deterministic = deterministic;
 	result->collate_is_c = false;
 	result->ctype_is_c = false;
@@ -567,11 +578,12 @@ strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2
 {
 	int			result;
 	UErrorCode	status;
+	struct icu_provider *icu = (struct icu_provider *) locale->provider_data;
 
 	Assert(GetDatabaseEncoding() == PG_UTF8);
 
 	status = U_ZERO_ERROR;
-	result = ucol_strcollUTF8(locale->info.icu.ucol,
+	result = ucol_strcollUTF8(icu->ucol,
 							  arg1, len1,
 							  arg2, len2,
 							  &status);
@@ -595,6 +607,8 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	size_t		uchar_bsize;
 	Size		result_bsize;
 
+	struct icu_provider *icu = (struct icu_provider *) locale->provider_data;
+
 	init_icu_converter();
 
 	ulen = uchar_length(icu_converter, src, srclen);
@@ -608,7 +622,7 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 
 	ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
 
-	result_bsize = ucol_getSortKey(locale->info.icu.ucol,
+	result_bsize = ucol_getSortKey(icu->ucol,
 								   uchar, ulen,
 								   (uint8_t *) dest, destsize);
 
@@ -639,12 +653,14 @@ strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
 	uint32_t	state[2];
 	UErrorCode	status;
 
+	struct icu_provider *icu = (struct icu_provider *) locale->provider_data;
+
 	Assert(GetDatabaseEncoding() == PG_UTF8);
 
 	uiter_setUTF8(&iter, src, srclen);
 	state[0] = state[1] = 0;	/* won't need that again */
 	status = U_ZERO_ERROR;
-	result = ucol_nextSortKeyPart(locale->info.icu.ucol,
+	result = ucol_nextSortKeyPart(icu->ucol,
 								  &iter,
 								  state,
 								  (uint8_t *) dest,
@@ -751,11 +767,13 @@ icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
 	UErrorCode	status;
 	int32_t		len_dest;
 
+	struct icu_provider *icu = (struct icu_provider *) mylocale->provider_data;
+
 	len_dest = len_source;		/* try first with same length */
 	*buff_dest = palloc(len_dest * sizeof(**buff_dest));
 	status = U_ZERO_ERROR;
 	len_dest = func(*buff_dest, len_dest, buff_source, len_source,
-					mylocale->info.icu.locale, &status);
+					icu->locale, &status);
 	if (status == U_BUFFER_OVERFLOW_ERROR)
 	{
 		/* try again with adjusted length */
@@ -763,7 +781,7 @@ icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
 		*buff_dest = palloc(len_dest * sizeof(**buff_dest));
 		status = U_ZERO_ERROR;
 		len_dest = func(*buff_dest, len_dest, buff_source, len_source,
-						mylocale->info.icu.locale, &status);
+						icu->locale, &status);
 	}
 	if (U_FAILURE(status))
 		ereport(ERROR,
@@ -837,6 +855,8 @@ strncoll_icu(const char *arg1, ssize_t len1,
 			   *uchar2;
 	int			result;
 
+	struct icu_provider *icu = (struct icu_provider *) locale->provider_data;
+
 	/* if encoding is UTF8, use more efficient strncoll_icu_utf8 */
 #ifdef HAVE_UCOL_STRCOLLUTF8
 	Assert(GetDatabaseEncoding() != PG_UTF8);
@@ -859,7 +879,7 @@ strncoll_icu(const char *arg1, ssize_t len1,
 	ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
 	ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
 
-	result = ucol_strcoll(locale->info.icu.ucol,
+	result = ucol_strcoll(icu->ucol,
 						  uchar1, ulen1,
 						  uchar2, ulen2);
 
@@ -885,6 +905,8 @@ strnxfrm_prefix_icu(char *dest, size_t destsize,
 	size_t		uchar_bsize;
 	Size		result_bsize;
 
+	struct icu_provider *icu = (struct icu_provider *) locale->provider_data;
+
 	/* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */
 	Assert(GetDatabaseEncoding() != PG_UTF8);
 
@@ -904,7 +926,7 @@ strnxfrm_prefix_icu(char *dest, size_t destsize,
 	uiter_setString(&iter, uchar, ulen);
 	state[0] = state[1] = 0;	/* won't need that again */
 	status = U_ZERO_ERROR;
-	result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol,
+	result_bsize = ucol_nextSortKeyPart(icu->ucol,
 										&iter,
 										state,
 										(uint8_t *) dest,
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index e9f9fc1e369..41fd657ace6 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -1,3 +1,4 @@
+
 /*-----------------------------------------------------------------------
  *
  * PostgreSQL locale utilities for libc
@@ -81,6 +82,11 @@
  */
 #define		TEXTBUFLEN			1024
 
+struct libc_provider
+{
+	locale_t	lt;
+};
+
 extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
 
 static int	strncoll_libc(const char *arg1, ssize_t len1,
@@ -121,116 +127,154 @@ static size_t strupper_libc_mb(char *dest, size_t destsize,
 static bool
 wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
 {
-	return isdigit_l((unsigned char) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return isdigit_l((unsigned char) wc, libc->lt);
 }
 
 static bool
 wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
 {
-	return isalpha_l((unsigned char) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return isalpha_l((unsigned char) wc, libc->lt);
 }
 
 static bool
 wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
 {
-	return isalnum_l((unsigned char) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return isalnum_l((unsigned char) wc, libc->lt);
 }
 
 static bool
 wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
 {
-	return isupper_l((unsigned char) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return isupper_l((unsigned char) wc, libc->lt);
 }
 
 static bool
 wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
 {
-	return islower_l((unsigned char) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return islower_l((unsigned char) wc, libc->lt);
 }
 
 static bool
 wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
 {
-	return isgraph_l((unsigned char) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return isgraph_l((unsigned char) wc, libc->lt);
 }
 
 static bool
 wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
 {
-	return isprint_l((unsigned char) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return isprint_l((unsigned char) wc, libc->lt);
 }
 
 static bool
 wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
 {
-	return ispunct_l((unsigned char) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return ispunct_l((unsigned char) wc, libc->lt);
 }
 
 static bool
 wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
 {
-	return isspace_l((unsigned char) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return isspace_l((unsigned char) wc, libc->lt);
 }
 
 static bool
 wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
 {
-	return iswdigit_l((wint_t) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return iswdigit_l((wint_t) wc, libc->lt);
 }
 
 static bool
 wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
 {
-	return iswalpha_l((wint_t) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return iswalpha_l((wint_t) wc, libc->lt);
 }
 
 static bool
 wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
 {
-	return iswalnum_l((wint_t) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return iswalnum_l((wint_t) wc, libc->lt);
 }
 
 static bool
 wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
 {
-	return iswupper_l((wint_t) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return iswupper_l((wint_t) wc, libc->lt);
 }
 
 static bool
 wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
 {
-	return iswlower_l((wint_t) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return iswlower_l((wint_t) wc, libc->lt);
 }
 
 static bool
 wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
 {
-	return iswgraph_l((wint_t) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return iswgraph_l((wint_t) wc, libc->lt);
 }
 
 static bool
 wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
 {
-	return iswprint_l((wint_t) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return iswprint_l((wint_t) wc, libc->lt);
 }
 
 static bool
 wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
 {
-	return iswpunct_l((wint_t) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return iswpunct_l((wint_t) wc, libc->lt);
 }
 
 static bool
 wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
 {
-	return iswspace_l((wint_t) wc, locale->info.lt);
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
+	return iswspace_l((wint_t) wc, libc->lt);
 }
 
 static char
 char_tolower_libc(unsigned char ch, pg_locale_t locale)
 {
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
 	Assert(pg_database_encoding_max_length() == 1);
-	return tolower_l(ch, locale->info.lt);
+	return tolower_l(ch, libc->lt);
 }
 
 static bool
@@ -238,22 +282,26 @@ char_is_cased_libc(char ch, pg_locale_t locale)
 {
 	bool		is_multibyte = pg_database_encoding_max_length() > 1;
 
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
 	if (is_multibyte && IS_HIGHBIT_SET(ch))
 		return true;
 	else
-		return isalpha_l((unsigned char) ch, locale->info.lt);
+		return isalpha_l((unsigned char) ch, libc->lt);
 }
 
 static pg_wchar
 toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
 {
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
 	Assert(GetDatabaseEncoding() != PG_UTF8);
 
 	/* force C behavior for ASCII characters, per comments above */
 	if (locale->is_default && wc <= (pg_wchar) 127)
 		return pg_ascii_toupper((unsigned char) wc);
 	if (wc <= (pg_wchar) UCHAR_MAX)
-		return toupper_l((unsigned char) wc, locale->info.lt);
+		return toupper_l((unsigned char) wc, libc->lt);
 	else
 		return wc;
 }
@@ -261,13 +309,15 @@ toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
 static pg_wchar
 toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
 {
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
 	Assert(GetDatabaseEncoding() == PG_UTF8);
 
 	/* force C behavior for ASCII characters, per comments above */
 	if (locale->is_default && wc <= (pg_wchar) 127)
 		return pg_ascii_toupper((unsigned char) wc);
 	if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
-		return towupper_l((wint_t) wc, locale->info.lt);
+		return towupper_l((wint_t) wc, libc->lt);
 	else
 		return wc;
 }
@@ -275,13 +325,15 @@ toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
 static pg_wchar
 tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
 {
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
 	Assert(GetDatabaseEncoding() != PG_UTF8);
 
 	/* force C behavior for ASCII characters, per comments above */
 	if (locale->is_default && wc <= (pg_wchar) 127)
 		return pg_ascii_tolower((unsigned char) wc);
 	if (wc <= (pg_wchar) UCHAR_MAX)
-		return tolower_l((unsigned char) wc, locale->info.lt);
+		return tolower_l((unsigned char) wc, libc->lt);
 	else
 		return wc;
 }
@@ -289,13 +341,15 @@ tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
 static pg_wchar
 tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
 {
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
 	Assert(GetDatabaseEncoding() == PG_UTF8);
 
 	/* force C behavior for ASCII characters, per comments above */
 	if (locale->is_default && wc <= (pg_wchar) 127)
 		return pg_ascii_tolower((unsigned char) wc);
 	if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
-		return towlower_l((wint_t) wc, locale->info.lt);
+		return towlower_l((wint_t) wc, libc->lt);
 	else
 		return wc;
 }
@@ -406,7 +460,7 @@ strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 
 	if (srclen + 1 <= destsize)
 	{
-		locale_t	loc = locale->info.lt;
+		struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
 		char	   *p;
 
 		if (srclen + 1 > destsize)
@@ -427,7 +481,7 @@ strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 			if (locale->is_default)
 				*p = pg_tolower((unsigned char) *p);
 			else
-				*p = tolower_l((unsigned char) *p, loc);
+				*p = tolower_l((unsigned char) *p, libc->lt);
 		}
 	}
 
@@ -438,7 +492,8 @@ static size_t
 strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
 {
-	locale_t	loc = locale->info.lt;
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
 	size_t		result_size;
 	wchar_t    *workspace;
 	char	   *result;
@@ -460,7 +515,7 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	char2wchar(workspace, srclen + 1, src, srclen, locale);
 
 	for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
-		workspace[curr_char] = towlower_l(workspace[curr_char], loc);
+		workspace[curr_char] = towlower_l(workspace[curr_char], libc->lt);
 
 	/*
 	 * Make result large enough; case change might change number of bytes
@@ -491,7 +546,7 @@ strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 
 	if (srclen + 1 <= destsize)
 	{
-		locale_t	loc = locale->info.lt;
+		struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
 		int			wasalnum = false;
 		char	   *p;
 
@@ -517,11 +572,11 @@ strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 			else
 			{
 				if (wasalnum)
-					*p = tolower_l((unsigned char) *p, loc);
+					*p = tolower_l((unsigned char) *p, libc->lt);
 				else
-					*p = toupper_l((unsigned char) *p, loc);
+					*p = toupper_l((unsigned char) *p, libc->lt);
 			}
-			wasalnum = isalnum_l((unsigned char) *p, loc);
+			wasalnum = isalnum_l((unsigned char) *p, libc->lt);
 		}
 	}
 
@@ -532,7 +587,8 @@ static size_t
 strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
 {
-	locale_t	loc = locale->info.lt;
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
 	int			wasalnum = false;
 	size_t		result_size;
 	wchar_t    *workspace;
@@ -557,10 +613,10 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
 	{
 		if (wasalnum)
-			workspace[curr_char] = towlower_l(workspace[curr_char], loc);
+			workspace[curr_char] = towlower_l(workspace[curr_char], libc->lt);
 		else
-			workspace[curr_char] = towupper_l(workspace[curr_char], loc);
-		wasalnum = iswalnum_l(workspace[curr_char], loc);
+			workspace[curr_char] = towupper_l(workspace[curr_char], libc->lt);
+		wasalnum = iswalnum_l(workspace[curr_char], libc->lt);
 	}
 
 	/*
@@ -592,7 +648,7 @@ strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 
 	if (srclen + 1 <= destsize)
 	{
-		locale_t	loc = locale->info.lt;
+		struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
 		char	   *p;
 
 		memcpy(dest, src, srclen);
@@ -610,7 +666,7 @@ strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 			if (locale->is_default)
 				*p = pg_toupper((unsigned char) *p);
 			else
-				*p = toupper_l((unsigned char) *p, loc);
+				*p = toupper_l((unsigned char) *p, libc->lt);
 		}
 	}
 
@@ -621,7 +677,8 @@ static size_t
 strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
 {
-	locale_t	loc = locale->info.lt;
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
 	size_t		result_size;
 	wchar_t    *workspace;
 	char	   *result;
@@ -643,7 +700,7 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	char2wchar(workspace, srclen + 1, src, srclen, locale);
 
 	for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
-		workspace[curr_char] = towupper_l(workspace[curr_char], loc);
+		workspace[curr_char] = towupper_l(workspace[curr_char], libc->lt);
 
 	/*
 	 * Make result large enough; case change might change number of bytes
@@ -671,6 +728,7 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
 	const char *collate;
 	const char *ctype;
 	locale_t	loc;
+	struct libc_provider *libc;
 	pg_locale_t result;
 
 	if (collid == DEFAULT_COLLATION_OID)
@@ -709,16 +767,19 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
 		ReleaseSysCache(tp);
 	}
 
-
 	loc = make_libc_collator(collate, ctype);
 
 	result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
+
+	libc = MemoryContextAllocZero(context, sizeof(struct libc_provider));
+	libc->lt = loc;
+	result->provider_data = (void *) libc;
+
 	result->deterministic = true;
 	result->collate_is_c = (strcmp(collate, "C") == 0) ||
 		(strcmp(collate, "POSIX") == 0);
 	result->ctype_is_c = (strcmp(ctype, "C") == 0) ||
 		(strcmp(ctype, "POSIX") == 0);
-	result->info.lt = loc;
 	if (!result->collate_is_c)
 	{
 #ifdef WIN32
@@ -832,6 +893,8 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
 	const char *arg2n;
 	int			result;
 
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
 	if (bufsize1 + bufsize2 > TEXTBUFLEN)
 		buf = palloc(bufsize1 + bufsize2);
 
@@ -862,7 +925,7 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
 		arg2n = buf2;
 	}
 
-	result = strcoll_l(arg1n, arg2n, locale->info.lt);
+	result = strcoll_l(arg1n, arg2n, libc->lt);
 
 	if (buf != sbuf)
 		pfree(buf);
@@ -886,8 +949,10 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	size_t		bufsize = srclen + 1;
 	size_t		result;
 
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
 	if (srclen == -1)
-		return strxfrm_l(dest, src, destsize, locale->info.lt);
+		return strxfrm_l(dest, src, destsize, libc->lt);
 
 	if (bufsize > TEXTBUFLEN)
 		buf = palloc(bufsize);
@@ -896,7 +961,7 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	memcpy(buf, src, srclen);
 	buf[srclen] = '\0';
 
-	result = strxfrm_l(dest, buf, destsize, locale->info.lt);
+	result = strxfrm_l(dest, buf, destsize, libc->lt);
 
 	if (buf != sbuf)
 		pfree(buf);
@@ -994,6 +1059,8 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
 	int			r;
 	int			result;
 
+	struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
 	Assert(GetDatabaseEncoding() == PG_UTF8);
 
 	if (len1 == -1)
@@ -1038,7 +1105,7 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
 	((LPWSTR) a2p)[r] = 0;
 
 	errno = 0;
-	result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
+	result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, libc->lt);
 	if (result == 2147483647)	/* _NLSCMPERROR; missing from mingw headers */
 		ereport(ERROR,
 				(errmsg("could not compare Unicode strings: %m")));
@@ -1167,8 +1234,10 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
 	}
 	else
 	{
+		struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
 		/* Use wcstombs_l for nondefault locales */
-		result = wcstombs_l(to, from, tolen, locale->info.lt);
+		result = wcstombs_l(to, from, tolen, libc->lt);
 	}
 
 	return result;
@@ -1227,8 +1296,10 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
 		}
 		else
 		{
+			struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
 			/* Use mbstowcs_l for nondefault locales */
-			result = mbstowcs_l(to, str, tolen, locale->info.lt);
+			result = mbstowcs_l(to, str, tolen, libc->lt);
 		}
 
 		pfree(str);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 44ff60a25b4..d258ca756ab 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -151,22 +151,7 @@ struct pg_locale_struct
 	const struct collate_methods *collate;	/* NULL if collate_is_c */
 	const struct ctype_methods *ctype;	/* NULL if ctype_is_c */
 
-	union
-	{
-		struct
-		{
-			const char *locale;
-			bool		casemap_full;
-		}			builtin;
-		locale_t	lt;
-#ifdef USE_ICU
-		struct
-		{
-			const char *locale;
-			UCollator  *ucol;
-		}			icu;
-#endif
-	}			info;
+	void	   *provider_data;
 };
 
 extern void init_database_collation(void);
-- 
2.43.0

From cb441061f447d3ac2faa8cdb2f768ca42c5d8f29 Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Wed, 9 Oct 2024 10:00:58 -0700
Subject: [PATCH v16 4/4] Don't include ICU headers in pg_locale.h.

---
 src/backend/commands/collationcmds.c  | 4 ++++
 src/backend/utils/adt/formatting.c    | 4 ----
 src/backend/utils/adt/pg_locale.c     | 4 ++++
 src/backend/utils/adt/pg_locale_icu.c | 1 +
 src/backend/utils/adt/varlena.c       | 4 ++++
 src/include/utils/pg_locale.h         | 4 ----
 6 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index 8acbfbbeda0..a57fe93c387 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -14,6 +14,10 @@
  */
 #include "postgres.h"
 
+#ifdef USE_ICU
+#include <unicode/ucol.h>
+#endif
+
 #include "access/htup_details.h"
 #include "access/table.h"
 #include "access/xact.h"
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 5bd1e01f7e4..b3d5e0436ee 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -70,10 +70,6 @@
 #include <limits.h>
 #include <wctype.h>
 
-#ifdef USE_ICU
-#include <unicode/ustring.h>
-#endif
-
 #include "catalog/pg_collation.h"
 #include "catalog/pg_type.h"
 #include "common/int.h"
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 451ac4e2d9b..49bb7fc0104 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -33,6 +33,10 @@
 
 #include <time.h>
 
+#ifdef USE_ICU
+#include <unicode/ucol.h>
+#endif
+
 #include "access/htup_details.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_database.h"
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index 497be95a869..d37a76a9af1 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -13,6 +13,7 @@
 
 #ifdef USE_ICU
 #include <unicode/ucnv.h>
+#include <unicode/ucol.h>
 #include <unicode/ustring.h>
 
 /*
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 3e4d5568bde..6fbc984c534 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -17,6 +17,10 @@
 #include <ctype.h>
 #include <limits.h>
 
+#ifdef USE_ICU
+#include <unicode/uchar.h>
+#endif
+
 #include "access/detoast.h"
 #include "access/toast_compression.h"
 #include "catalog/pg_collation.h"
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index d258ca756ab..1ff07da4cb3 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -14,10 +14,6 @@
 
 #include "mb/pg_wchar.h"
 
-#ifdef USE_ICU
-#include <unicode/ucol.h>
-#endif
-
 /* use for libc locale names */
 #define LOCALE_NAME_BUFLEN 128
 
-- 
2.43.0

Reply via email to