On Mon, 2024-07-29 at 21:45 +0200, Peter Eisentraut wrote: > I have also re-reviewed the patches and I agree they are good to go.
I found a couple issues with the later patches: * There was still some confusion about the default collation vs. datcollate/datctype for callers of wchar2char() and char2wchar() (those functions only work for libc). I introduced a new pg_locale_t structure to represent datcollate/datctype regardless of datlocprovider to solve this. * Another loose end relying on setlocale(): in selfuncs.c, there's still a call directly to strxfrm(), which depends on setlocale(). I changed this to lookup the collation and then use pg_strxfrm(). That should improve histogram selectivity estimates because it uses the correct provider, rather than relying on setlocale(), right? New series attached. Regards, Jeff Davis
From 5b903c82f34f5da9cab58ecd0a2683454d6ac9ed Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 14:48:07 -0700 Subject: [PATCH v6 1/3] Make datcollate/datctype accessible as a pg_locale_t. get_db_env_locale() returns a libc locale representing the LC_COLLATE / LC_CTYPE environment, which is the same as the database default collation if and only if the datlocprovider is libc. Update callers in ts_locale.c to use get_db_env_locale() instead of NULL. Discussion: https://postgr.es/m/cfd9eb85-c52a-4ec9-a90e-a5e4de56e...@eisentraut.org Reviewed-by: Peter Eisentraut, Andreas Karlsson --- src/backend/tsearch/ts_locale.c | 37 ++++++++++++++++--------- src/backend/tsearch/wparser_def.c | 6 +++-- src/backend/utils/adt/pg_locale.c | 45 ++++++++++++++++++++++++++++--- src/backend/utils/init/postinit.c | 5 +--- src/include/utils/pg_locale.h | 5 ++-- 5 files changed, 74 insertions(+), 24 deletions(-) diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index bc44599de6a..6befd8e82d6 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "catalog/pg_collation.h" #include "common/string.h" #include "storage/fd.h" #include "tsearch/ts_locale.h" @@ -36,9 +37,11 @@ t_isdigit(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - if (clen == 1 || database_ctype_is_c) + /* TODO: determine collation properly */ + pg_locale_t mylocale = get_db_env_locale(); + + if (clen == 1 || mylocale->ctype_is_c) return isdigit(TOUCHAR(ptr)); char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); @@ -51,9 +54,11 @@ t_isspace(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - if (clen == 1 || database_ctype_is_c) + /* TODO: determine collation properly */ + pg_locale_t mylocale = get_db_env_locale(); + + if (clen == 1 || mylocale->ctype_is_c) return isspace(TOUCHAR(ptr)); char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); @@ -66,9 +71,11 @@ t_isalpha(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - if (clen == 1 || database_ctype_is_c) + /* TODO: determine collation properly */ + pg_locale_t mylocale = get_db_env_locale(); + + if (clen == 1 || mylocale->ctype_is_c) return isalpha(TOUCHAR(ptr)); char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); @@ -81,9 +88,11 @@ t_isalnum(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - if (clen == 1 || database_ctype_is_c) + /* TODO: determine collation properly */ + pg_locale_t mylocale = get_db_env_locale(); + + if (clen == 1 || mylocale->ctype_is_c) return isalnum(TOUCHAR(ptr)); char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); @@ -96,9 +105,11 @@ t_isprint(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - if (clen == 1 || database_ctype_is_c) + /* TODO: determine collation properly */ + pg_locale_t mylocale = get_db_env_locale(); + + if (clen == 1 || mylocale->ctype_is_c) return isprint(TOUCHAR(ptr)); char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); @@ -266,7 +277,9 @@ char * lowerstr_with_len(const char *str, int len) { char *out; - pg_locale_t mylocale = 0; /* TODO */ + + /* TODO: determine collation properly */ + pg_locale_t mylocale = get_db_env_locale(); if (len == 0) return pstrdup(""); @@ -277,7 +290,7 @@ lowerstr_with_len(const char *str, int len) * Also, for a C locale there is no need to process as multibyte. From * backend/utils/adt/oracle_compat.c Teodor */ - if (pg_database_encoding_max_length() > 1 && !database_ctype_is_c) + if (pg_database_encoding_max_length() > 1 && !mylocale->ctype_is_c) { wchar_t *wstr, *wptr; diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index 3919ef27b57..45caec0c4f0 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -17,6 +17,7 @@ #include <limits.h> #include <wctype.h> +#include "catalog/pg_collation.h" #include "commands/defrem.h" #include "mb/pg_wchar.h" #include "miscadmin.h" @@ -299,10 +300,11 @@ TParserInit(char *str, int len) */ if (prs->charmaxlen > 1) { - pg_locale_t mylocale = 0; /* TODO */ + /* TODO: determine collation properly */ + pg_locale_t mylocale = get_db_env_locale(); prs->usewide = true; - if (database_ctype_is_c) + if (mylocale->ctype_is_c) { /* * char2wchar doesn't work for C-locale and sizeof(pg_wchar) could diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 627ab89d7cc..0295d834cc5 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -114,10 +114,8 @@ char *localized_full_days[7 + 1]; char *localized_abbrev_months[12 + 1]; char *localized_full_months[12 + 1]; -/* is the databases's LC_CTYPE the C locale? */ -bool database_ctype_is_c = false; - static struct pg_locale_struct default_locale; +static struct pg_locale_struct database_env_locale; /* indicates whether locale information cache is valid */ static bool CurrentLocaleConvValid = false; @@ -1471,6 +1469,42 @@ pg_locale_deterministic(pg_locale_t locale) return locale->deterministic; } +/* + * Initialize the database environment locale and store in a pg_locale_t. + */ +void +init_db_env_locale(const char *datcollate, const char *datctype) +{ + Assert(database_env_locale.provider == (char) 0); + + database_env_locale.provider = COLLPROVIDER_LIBC; + database_env_locale.deterministic = true; + database_env_locale.collate_is_c = (strcmp(datcollate, "C") == 0) || + (strcmp(datcollate, "POSIX") == 0); + database_env_locale.ctype_is_c = (strcmp(datctype, "C") == 0) || + (strcmp(datctype, "POSIX") == 0); + + make_libc_collator(datcollate, datctype, &database_env_locale); +} + +/* + * Return pg_locale_t representing the database environment locale. + * + * The provider is always libc, and it represents the server environment + * LC_COLLATE and LC_CTYPE. + * + * Most callers should use pg_newlocale_from_collation(DEFAULT_COLLATION_OID) + * instead to get a pg_locale_t representing the database default collation + * (which might be any provider). Use get_db_env_locale() only if the libc + * provider is needed, such as with wchar2char()/char2wchar(). + */ +pg_locale_t +get_db_env_locale(void) +{ + Assert(database_env_locale.provider != (char) 0); + return &database_env_locale; +} + /* * Initialize default_locale with database locale settings. */ @@ -1482,6 +1516,8 @@ init_database_collation(void) Datum datum; bool isnull; + Assert(default_locale.provider == (char) 0); + /* Fetch our pg_database row normally, via syscache */ tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId)); if (!HeapTupleIsValid(tup)) @@ -1571,7 +1607,10 @@ pg_newlocale_from_collation(Oid collid) Assert(OidIsValid(collid)); if (collid == DEFAULT_COLLATION_OID) + { + Assert(default_locale.provider != (char) 0); return &default_locale; + } cache_entry = lookup_collation_cache(collid); diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 13524ea488a..23ac403e390 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -418,10 +418,7 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect " which is not recognized by setlocale().", ctype), errhint("Recreate the database with another locale or install the missing locale."))); - if (strcmp(ctype, "C") == 0 || - strcmp(ctype, "POSIX") == 0) - database_ctype_is_c = true; - + init_db_env_locale(collate, ctype); init_database_collation(); /* diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index f41d33975be..47b2942c9d8 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -48,9 +48,6 @@ extern PGDLLIMPORT char *localized_full_days[]; extern PGDLLIMPORT char *localized_abbrev_months[]; extern PGDLLIMPORT char *localized_full_months[]; -/* is the databases's LC_CTYPE the C locale? */ -extern PGDLLIMPORT bool database_ctype_is_c; - extern bool check_locale(int category, const char *locale, char **canonname); extern char *pg_perm_setlocale(int category, const char *locale); @@ -112,6 +109,8 @@ extern void make_icu_collator(const char *iculocstr, struct pg_locale_struct *resultp); extern bool pg_locale_deterministic(pg_locale_t locale); +extern void init_db_env_locale(const char *datcollate, const char *datctype); +extern pg_locale_t get_db_env_locale(void); extern void init_database_collation(void); extern pg_locale_t pg_newlocale_from_collation(Oid collid); -- 2.34.1
From 1eea055318b07155fe025d9f6cf56dadcea040a0 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 11:58:59 -0700 Subject: [PATCH v6 2/3] Remove support for null pg_locale_t. Previously, passing NULL for pg_locale_t meant "use the libc provider and the server environment". Now that the database collation is represented as a proper pg_locale_t (not dependent on setlocale()), remove special cases for NULL. Discussion: https://postgr.es/m/cfd9eb85-c52a-4ec9-a90e-a5e4de56e...@eisentraut.org Reviewed-by: Peter Eisentraut, Andreas Karlsson --- src/backend/access/hash/hashfunc.c | 10 +-- src/backend/regex/regc_pg_locale.c | 113 +---------------------------- src/backend/utils/adt/formatting.c | 84 ++++++--------------- src/backend/utils/adt/like.c | 10 +-- src/backend/utils/adt/pg_locale.c | 78 ++++++-------------- src/backend/utils/adt/varchar.c | 10 +-- src/backend/utils/adt/varlena.c | 28 +++---- 7 files changed, 69 insertions(+), 264 deletions(-) diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index ce8ee0ea2ef..d151751e185 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -268,7 +268,7 @@ hashtext(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); Oid collid = PG_GET_COLLATION(); - pg_locale_t mylocale = 0; + pg_locale_t mylocale; Datum result; if (!collid) @@ -277,8 +277,7 @@ hashtext(PG_FUNCTION_ARGS) errmsg("could not determine which collation to use for string hashing"), errhint("Use the COLLATE clause to set the collation explicitly."))); - if (!lc_collate_is_c(collid)) - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); if (pg_locale_deterministic(mylocale)) { @@ -322,7 +321,7 @@ hashtextextended(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); Oid collid = PG_GET_COLLATION(); - pg_locale_t mylocale = 0; + pg_locale_t mylocale; Datum result; if (!collid) @@ -331,8 +330,7 @@ hashtextextended(PG_FUNCTION_ARGS) errmsg("could not determine which collation to use for string hashing"), errhint("Use the COLLATE clause to set the collation explicitly."))); - if (!lc_collate_is_c(collid)) - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); if (pg_locale_deterministic(mylocale)) { diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index 9d98d10a285..947d73f3e0f 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -67,8 +67,6 @@ typedef enum { PG_REGEX_LOCALE_C, /* C locale (encoding independent) */ PG_REGEX_BUILTIN, /* built-in Unicode semantics */ - PG_REGEX_LOCALE_WIDE, /* Use <wctype.h> functions */ - PG_REGEX_LOCALE_1BYTE, /* Use <ctype.h> functions */ PG_REGEX_LOCALE_WIDE_L, /* Use locale_t <wctype.h> functions */ PG_REGEX_LOCALE_1BYTE_L, /* Use locale_t <ctype.h> functions */ PG_REGEX_LOCALE_ICU, /* Use ICU uchar.h functions */ @@ -261,13 +259,13 @@ pg_set_regex_collation(Oid collation) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("nondeterministic collations are not supported for regular expressions"))); - if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_BUILTIN) + if (pg_regex_locale->provider == COLLPROVIDER_BUILTIN) { Assert(GetDatabaseEncoding() == PG_UTF8); pg_regex_strategy = PG_REGEX_BUILTIN; } #ifdef USE_ICU - else if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU) + else if (pg_regex_locale->provider == COLLPROVIDER_ICU) { pg_regex_strategy = PG_REGEX_LOCALE_ICU; } @@ -275,19 +273,9 @@ pg_set_regex_collation(Oid collation) else { if (GetDatabaseEncoding() == PG_UTF8) - { - if (pg_regex_locale) - pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L; - else - pg_regex_strategy = PG_REGEX_LOCALE_WIDE; - } + pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L; else - { - if (pg_regex_locale) - pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L; - else - pg_regex_strategy = PG_REGEX_LOCALE_1BYTE; - } + pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L; } pg_regex_collation = collation; @@ -304,13 +292,6 @@ pg_wc_isdigit(pg_wchar c) (pg_char_properties[c] & PG_ISDIGIT)); case PG_REGEX_BUILTIN: return pg_u_isdigit(c, true); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswdigit((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isdigit((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswdigit_l((wint_t) c, pg_regex_locale->info.lt); @@ -338,13 +319,6 @@ pg_wc_isalpha(pg_wchar c) (pg_char_properties[c] & PG_ISALPHA)); case PG_REGEX_BUILTIN: return pg_u_isalpha(c); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalpha((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isalpha((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswalpha_l((wint_t) c, pg_regex_locale->info.lt); @@ -372,13 +346,6 @@ pg_wc_isalnum(pg_wchar c) (pg_char_properties[c] & PG_ISALNUM)); case PG_REGEX_BUILTIN: return pg_u_isalnum(c, true); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalnum((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isalnum((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswalnum_l((wint_t) c, pg_regex_locale->info.lt); @@ -415,13 +382,6 @@ pg_wc_isupper(pg_wchar c) (pg_char_properties[c] & PG_ISUPPER)); case PG_REGEX_BUILTIN: return pg_u_isupper(c); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswupper((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isupper((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswupper_l((wint_t) c, pg_regex_locale->info.lt); @@ -449,13 +409,6 @@ pg_wc_islower(pg_wchar c) (pg_char_properties[c] & PG_ISLOWER)); case PG_REGEX_BUILTIN: return pg_u_islower(c); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswlower((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - islower((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswlower_l((wint_t) c, pg_regex_locale->info.lt); @@ -483,13 +436,6 @@ pg_wc_isgraph(pg_wchar c) (pg_char_properties[c] & PG_ISGRAPH)); case PG_REGEX_BUILTIN: return pg_u_isgraph(c); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswgraph((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isgraph((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswgraph_l((wint_t) c, pg_regex_locale->info.lt); @@ -517,13 +463,6 @@ pg_wc_isprint(pg_wchar c) (pg_char_properties[c] & PG_ISPRINT)); case PG_REGEX_BUILTIN: return pg_u_isprint(c); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswprint((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isprint((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswprint_l((wint_t) c, pg_regex_locale->info.lt); @@ -551,13 +490,6 @@ pg_wc_ispunct(pg_wchar c) (pg_char_properties[c] & PG_ISPUNCT)); case PG_REGEX_BUILTIN: return pg_u_ispunct(c, true); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswpunct((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - ispunct((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswpunct_l((wint_t) c, pg_regex_locale->info.lt); @@ -585,13 +517,6 @@ pg_wc_isspace(pg_wchar c) (pg_char_properties[c] & PG_ISSPACE)); case PG_REGEX_BUILTIN: return pg_u_isspace(c); - case PG_REGEX_LOCALE_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswspace((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isspace((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswspace_l((wint_t) c, pg_regex_locale->info.lt); @@ -620,20 +545,6 @@ pg_wc_toupper(pg_wchar c) return c; case PG_REGEX_BUILTIN: return unicode_uppercase_simple(c); - case PG_REGEX_LOCALE_WIDE: - /* force C behavior for ASCII characters, per comments above */ - if (c <= (pg_wchar) 127) - return pg_ascii_toupper((unsigned char) c); - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towupper((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - /* force C behavior for ASCII characters, per comments above */ - if (c <= (pg_wchar) 127) - return pg_ascii_toupper((unsigned char) c); - if (c <= (pg_wchar) UCHAR_MAX) - return toupper((unsigned char) c); - return c; case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return towupper_l((wint_t) c, pg_regex_locale->info.lt); @@ -662,20 +573,6 @@ pg_wc_tolower(pg_wchar c) return c; case PG_REGEX_BUILTIN: return unicode_lowercase_simple(c); - case PG_REGEX_LOCALE_WIDE: - /* force C behavior for ASCII characters, per comments above */ - if (c <= (pg_wchar) 127) - return pg_ascii_tolower((unsigned char) c); - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towlower((wint_t) c); - /* FALL THRU */ - case PG_REGEX_LOCALE_1BYTE: - /* force C behavior for ASCII characters, per comments above */ - if (c <= (pg_wchar) 127) - return pg_ascii_tolower((unsigned char) c); - if (c <= (pg_wchar) UCHAR_MAX) - return tolower((unsigned char) c); - return c; case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return towlower_l((wint_t) c, pg_regex_locale->info.lt); @@ -829,11 +726,9 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode) case PG_REGEX_BUILTIN: max_chr = (pg_wchar) MAX_SIMPLE_CHR; break; - case PG_REGEX_LOCALE_WIDE: case PG_REGEX_LOCALE_WIDE_L: max_chr = (pg_wchar) MAX_SIMPLE_CHR; break; - case PG_REGEX_LOCALE_1BYTE: case PG_REGEX_LOCALE_1BYTE_L: #if MAX_SIMPLE_CHR >= UCHAR_MAX max_chr = (pg_wchar) UCHAR_MAX; diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 8736ada4be2..68069fcfd3b 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1665,7 +1665,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) mylocale = pg_newlocale_from_collation(collid); #ifdef USE_ICU - if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + if (mylocale->provider == COLLPROVIDER_ICU) { int32_t len_uchar; int32_t len_conv; @@ -1681,7 +1681,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) } else #endif - if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN) + if (mylocale->provider == COLLPROVIDER_BUILTIN) { const char *src = buff; size_t srclen = nbytes; @@ -1710,7 +1710,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) } else { - Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC); + Assert(mylocale->provider == COLLPROVIDER_LIBC); if (pg_database_encoding_max_length() > 1) { @@ -1730,12 +1730,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); for (curr_char = 0; workspace[curr_char] != 0; curr_char++) - { - if (mylocale) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); - else - workspace[curr_char] = towlower(workspace[curr_char]); - } + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); /* * Make result large enough; case change might change number @@ -1761,12 +1756,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) * collations you get exactly what the collation says. */ for (p = result; *p; p++) - { - if (mylocale) - *p = tolower_l((unsigned char) *p, mylocale->info.lt); - else - *p = pg_tolower((unsigned char) *p); - } + *p = tolower_l((unsigned char) *p, mylocale->info.lt); } } } @@ -1813,7 +1803,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) mylocale = pg_newlocale_from_collation(collid); #ifdef USE_ICU - if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + if (mylocale->provider == COLLPROVIDER_ICU) { int32_t len_uchar, len_conv; @@ -1829,7 +1819,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) } else #endif - if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN) + if (mylocale->provider == COLLPROVIDER_BUILTIN) { const char *src = buff; size_t srclen = nbytes; @@ -1858,7 +1848,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) } else { - Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC); + Assert(mylocale->provider == COLLPROVIDER_LIBC); if (pg_database_encoding_max_length() > 1) { @@ -1878,12 +1868,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); for (curr_char = 0; workspace[curr_char] != 0; curr_char++) - { - if (mylocale) - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); - else - workspace[curr_char] = towupper(workspace[curr_char]); - } + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); /* * Make result large enough; case change might change number @@ -1909,12 +1894,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) * collations you get exactly what the collation says. */ for (p = result; *p; p++) - { - if (mylocale) - *p = toupper_l((unsigned char) *p, mylocale->info.lt); - else - *p = pg_toupper((unsigned char) *p); - } + *p = toupper_l((unsigned char) *p, mylocale->info.lt); } } } @@ -2003,7 +1983,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) mylocale = pg_newlocale_from_collation(collid); #ifdef USE_ICU - if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + if (mylocale->provider == COLLPROVIDER_ICU) { int32_t len_uchar, len_conv; @@ -2019,7 +1999,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) } else #endif - if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN) + if (mylocale->provider == COLLPROVIDER_BUILTIN) { const char *src = buff; size_t srclen = nbytes; @@ -2060,7 +2040,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) } else { - Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC); + Assert(mylocale->provider == COLLPROVIDER_LIBC); if (pg_database_encoding_max_length() > 1) { @@ -2081,22 +2061,11 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) for (curr_char = 0; workspace[curr_char] != 0; curr_char++) { - if (mylocale) - { - if (wasalnum) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); - else - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); - wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt); - } + if (wasalnum) + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); else - { - if (wasalnum) - workspace[curr_char] = towlower(workspace[curr_char]); - else - workspace[curr_char] = towupper(workspace[curr_char]); - wasalnum = iswalnum(workspace[curr_char]); - } + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); + wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt); } /* @@ -2124,22 +2093,11 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) */ for (p = result; *p; p++) { - if (mylocale) - { - if (wasalnum) - *p = tolower_l((unsigned char) *p, mylocale->info.lt); - else - *p = toupper_l((unsigned char) *p, mylocale->info.lt); - wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt); - } + if (wasalnum) + *p = tolower_l((unsigned char) *p, mylocale->info.lt); else - { - if (wasalnum) - *p = pg_tolower((unsigned char) *p); - else - *p = pg_toupper((unsigned char) *p); - wasalnum = isalnum((unsigned char) *p); - } + *p = toupper_l((unsigned char) *p, mylocale->info.lt); + wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt); } } } diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 57ead66b5aa..0ecc96d48e5 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -174,8 +174,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) *p; int slen, plen; - pg_locale_t locale = 0; - bool locale_is_c = false; + pg_locale_t locale; if (!OidIsValid(collation)) { @@ -189,10 +188,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) errhint("Use the COLLATE clause to set the collation explicitly."))); } - if (lc_ctype_is_c(collation)) - locale_is_c = true; - else - locale = pg_newlocale_from_collation(collation); + locale = pg_newlocale_from_collation(collation); if (!pg_locale_deterministic(locale)) ereport(ERROR, @@ -228,7 +224,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) plen = VARSIZE_ANY_EXHDR(pat); s = VARDATA_ANY(str); slen = VARSIZE_ANY_EXHDR(str); - return SB_IMatchText(s, slen, p, plen, locale, locale_is_c); + return SB_IMatchText(s, slen, p, plen, locale, locale->ctype_is_c); } } diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 0295d834cc5..ccd6180a743 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1169,7 +1169,8 @@ get_iso_localename(const char *winlocname) char *hyphen; /* Locale names use only ASCII, any conversion locale suffices. */ - rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL); + rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), + get_db_env_locale()); if (rc == -1 || rc == sizeof(iso_lc_messages)) return NULL; @@ -1462,11 +1463,7 @@ make_icu_collator(const char *iculocstr, bool pg_locale_deterministic(pg_locale_t locale) { - /* default locale must always be deterministic */ - if (locale == NULL) - return true; - else - return locale->deterministic; + return locale->deterministic; } /* @@ -1867,7 +1864,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, int r; int result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); Assert(GetDatabaseEncoding() == PG_UTF8); #ifndef WIN32 Assert(false); @@ -1907,10 +1904,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, ((LPWSTR) a2p)[r] = 0; errno = 0; - if (locale) - result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt); - else - result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p); + result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt); if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */ ereport(ERROR, (errmsg("could not compare Unicode strings: %m"))); @@ -1936,7 +1930,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale) { int result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); #ifdef WIN32 if (GetDatabaseEncoding() == PG_UTF8) { @@ -1947,10 +1941,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale) } else #endif /* WIN32 */ - if (locale) result = strcoll_l(arg1, arg2, locale->info.lt); - else - result = strcoll(arg1, arg2); return result; } @@ -1972,7 +1963,7 @@ pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2, char *arg2n; int result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); #ifdef WIN32 /* check for this case before doing the work for nul-termination */ @@ -2118,7 +2109,7 @@ pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale) { int result; - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strcoll_libc(arg1, arg2, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2154,7 +2145,7 @@ pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2, { int result; - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strncoll_libc(arg1, len1, arg2, len2, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2172,13 +2163,10 @@ static size_t pg_strxfrm_libc(char *dest, const char *src, size_t destsize, pg_locale_t locale) { - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); #ifdef TRUST_STRXFRM - if (locale) - return strxfrm_l(dest, src, destsize, locale->info.lt); - else - return strxfrm(dest, src, destsize); + return strxfrm_l(dest, src, destsize, locale->info.lt); #else /* shouldn't happen */ PGLOCALE_SUPPORT_ERROR(locale->provider); @@ -2195,7 +2183,7 @@ pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize, size_t bufsize = srclen + 1; size_t result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); if (bufsize > TEXTBUFLEN) buf = palloc(bufsize); @@ -2367,7 +2355,7 @@ pg_strnxfrm_prefix_icu(char *dest, const char *src, int32_t srclen, bool pg_strxfrm_enabled(pg_locale_t locale) { - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) #ifdef TRUST_STRXFRM return true; #else @@ -2401,7 +2389,7 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale) { size_t result = 0; /* keep compiler quiet */ - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strxfrm_libc(dest, src, destsize, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2438,7 +2426,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen, { size_t result = 0; /* keep compiler quiet */ - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strnxfrm_libc(dest, src, srclen, destsize, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2458,7 +2446,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen, bool pg_strxfrm_prefix_enabled(pg_locale_t locale) { - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) return false; else if (locale->provider == COLLPROVIDER_ICU) return true; @@ -2488,13 +2476,11 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, { size_t result = 0; /* keep compiler quiet */ - if (!locale) - PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC); #ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) + if (locale->provider == COLLPROVIDER_ICU) result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale); -#endif else +#endif PGLOCALE_SUPPORT_ERROR(locale->provider); return result; @@ -2523,13 +2509,11 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, { size_t result = 0; /* keep compiler quiet */ - if (!locale) - PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC); #ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) + if (locale->provider == COLLPROVIDER_ICU) result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale); -#endif else +#endif PGLOCALE_SUPPORT_ERROR(locale->provider); return result; @@ -3086,7 +3070,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) { size_t result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); if (tolen == 0) return 0; @@ -3114,12 +3098,6 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) } else #endif /* WIN32 */ - if (locale == (pg_locale_t) 0) - { - /* Use wcstombs directly for the default locale */ - result = wcstombs(to, from, tolen); - } - else { /* Use wcstombs_l for nondefault locales */ result = wcstombs_l(to, from, tolen, locale->info.lt); @@ -3143,7 +3121,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, { size_t result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); if (tolen == 0) return 0; @@ -3176,16 +3154,8 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, /* mbstowcs requires ending '\0' */ char *str = pnstrdup(from, fromlen); - if (locale == (pg_locale_t) 0) - { - /* Use mbstowcs directly for the default locale */ - result = mbstowcs(to, str, tolen); - } - else - { - /* Use mbstowcs_l for nondefault locales */ - result = mbstowcs_l(to, str, tolen, locale->info.lt); - } + /* Use mbstowcs_l for nondefault locales */ + result = mbstowcs_l(to, str, tolen, locale->info.lt); pfree(str); } diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index 02dfe219f54..829375cd1a3 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -999,7 +999,7 @@ hashbpchar(PG_FUNCTION_ARGS) Oid collid = PG_GET_COLLATION(); char *keydata; int keylen; - pg_locale_t mylocale = 0; + pg_locale_t mylocale; Datum result; if (!collid) @@ -1011,8 +1011,7 @@ hashbpchar(PG_FUNCTION_ARGS) keydata = VARDATA_ANY(key); keylen = bcTruelen(key); - if (!lc_collate_is_c(collid)) - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); if (pg_locale_deterministic(mylocale)) { @@ -1054,7 +1053,7 @@ hashbpcharextended(PG_FUNCTION_ARGS) Oid collid = PG_GET_COLLATION(); char *keydata; int keylen; - pg_locale_t mylocale = 0; + pg_locale_t mylocale; Datum result; if (!collid) @@ -1066,8 +1065,7 @@ hashbpcharextended(PG_FUNCTION_ARGS) keydata = VARDATA_ANY(key); keylen = bcTruelen(key); - if (!lc_collate_is_c(collid)) - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); if (pg_locale_deterministic(mylocale)) { diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index d2e2e9bbba0..52ab8c43c66 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1217,12 +1217,11 @@ text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state) { int len1 = VARSIZE_ANY_EXHDR(t1); int len2 = VARSIZE_ANY_EXHDR(t2); - pg_locale_t mylocale = 0; + pg_locale_t mylocale; check_collation_set(collid); - if (!lc_collate_is_c(collid)) - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); if (!pg_locale_deterministic(mylocale)) ereport(ERROR, @@ -1619,18 +1618,14 @@ Datum texteq(PG_FUNCTION_ARGS) { Oid collid = PG_GET_COLLATION(); - bool locale_is_c = false; pg_locale_t mylocale = 0; bool result; check_collation_set(collid); - if (lc_collate_is_c(collid)) - locale_is_c = true; - else - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); - if (locale_is_c || pg_locale_deterministic(mylocale)) + if (pg_locale_deterministic(mylocale)) { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); @@ -1678,18 +1673,14 @@ Datum textne(PG_FUNCTION_ARGS) { Oid collid = PG_GET_COLLATION(); - bool locale_is_c = false; - pg_locale_t mylocale = 0; + pg_locale_t mylocale; bool result; check_collation_set(collid); - if (lc_collate_is_c(collid)) - locale_is_c = true; - else - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); - if (locale_is_c || pg_locale_deterministic(mylocale)) + if (pg_locale_deterministic(mylocale)) { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); @@ -1793,15 +1784,14 @@ text_starts_with(PG_FUNCTION_ARGS) Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); Oid collid = PG_GET_COLLATION(); - pg_locale_t mylocale = 0; + pg_locale_t mylocale; bool result; Size len1, len2; check_collation_set(collid); - if (!lc_collate_is_c(collid)) - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); if (!pg_locale_deterministic(mylocale)) ereport(ERROR, -- 2.34.1
From 9bd779fb711c902b33cfd3a5350e0736d7ceb138 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Mon, 29 Jul 2024 23:58:29 -0700 Subject: [PATCH v6 3/3] selfuncs.c: use pg_strxfrm() instead of strxfrm(). pg_strxfrm() takes a pg_locale_t, so it works properly with other providers and does not rely on setlocale(). Discussion: https://postgr.es/m/cfd9eb85-c52a-4ec9-a90e-a5e4de56e...@eisentraut.org Reviewed-by: Peter Eisentraut, Andreas Karlsson --- src/backend/utils/adt/pg_locale.c | 23 ++++++++++++++++------- src/backend/utils/adt/selfuncs.c | 9 +++++++-- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index ccd6180a743..17c55c5ab17 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -2164,14 +2164,7 @@ pg_strxfrm_libc(char *dest, const char *src, size_t destsize, pg_locale_t locale) { Assert(locale->provider == COLLPROVIDER_LIBC); - -#ifdef TRUST_STRXFRM return strxfrm_l(dest, src, destsize, locale->info.lt); -#else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - return 0; /* keep compiler quiet */ -#endif } static size_t @@ -2380,6 +2373,10 @@ pg_strxfrm_enabled(pg_locale_t locale) * The provided 'src' must be nul-terminated. If 'destsize' is zero, 'dest' * may be NULL. * + * Not all providers support pg_strxfrm() safely. The caller should check + * pg_strxfrm_enabled() first, otherwise this function may return wrong + * results or an error. + * * Returns the number of bytes needed to store the transformed string, * excluding the terminating nul byte. If the value returned is 'destsize' or * greater, the resulting contents of 'dest' are undefined. @@ -2412,6 +2409,10 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale) * 'src' does not need to be nul-terminated. If 'destsize' is zero, 'dest' may * be NULL. * + * Not all providers support pg_strnxfrm() safely. The caller should check + * pg_strxfrm_enabled() first, otherwise this function may return wrong + * results or an error. + * * Returns the number of bytes needed to store the transformed string, * excluding the terminating nul byte. If the value returned is 'destsize' or * greater, the resulting contents of 'dest' are undefined. @@ -2466,6 +2467,10 @@ pg_strxfrm_prefix_enabled(pg_locale_t locale) * * The provided 'src' must be nul-terminated. * + * Not all providers support pg_strxfrm_prefix() safely. The caller should + * check pg_strxfrm_prefix_enabled() first, otherwise this function may return + * wrong results or an error. + * * If destsize is not large enough to hold the resulting byte sequence, stores * only the first destsize bytes in 'dest'. Returns the number of bytes * actually copied to 'dest'. @@ -2495,6 +2500,10 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, * * The provided 'src' must be nul-terminated. * + * Not all providers support pg_strnxfrm_prefix() safely. The caller should + * check pg_strxfrm_prefix_enabled() first, otherwise this function may return + * wrong results or an error. + * * If destsize is not large enough to hold the resulting byte sequence, stores * only the first destsize bytes in 'dest'. Returns the number of bytes * actually copied to 'dest'. diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 877a62a62ec..673cfd9e703 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -4673,6 +4673,7 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure) if (!lc_collate_is_c(collid)) { + pg_locale_t mylocale = pg_newlocale_from_collation(collid); char *xfrmstr; size_t xfrmlen; size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY; @@ -4685,8 +4686,12 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure) * bogus data or set an error. This is not really a problem unless it * crashes since it will only give an estimation error and nothing * fatal. + * + * XXX: we do not check pg_strxfrm_enabled(). On some platforms and in + * some cases, libc strxfrm() may return the wrong results, but that + * will only lead to an estimation error. */ - xfrmlen = strxfrm(NULL, val, 0); + xfrmlen = pg_strxfrm(NULL, val, 0, mylocale); #ifdef WIN32 /* @@ -4698,7 +4703,7 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure) return val; #endif xfrmstr = (char *) palloc(xfrmlen + 1); - xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1); + xfrmlen2 = pg_strxfrm(xfrmstr, val, xfrmlen + 1, mylocale); /* * Some systems (e.g., glibc) can return a smaller value from the -- 2.34.1