On Mon, 2024-12-02 at 23:58 -0800, Jeff Davis wrote: > On Mon, 2024-12-02 at 16:39 +0100, Andreas Karlsson wrote: > > I feel your first patch in the series is something you can just > > commit. > > Done. > > I combined your patches and mine into the attached v10 series.
Here's v12 after committing a few of the earlier patches. I changed the ctype method table to have separate methods for isdigit, isalpha, etc., instead of the combined char_properties method. That's more consistent with how things are currently done. I may still be seeing a tiny perf regression using the same test as [1], but I don't expect it to have a practical impact. Let me know if you think that's a problem. I committed your change to move the version reporting into the provider-specific files. Your other change to lookup_collation() in namespace.c should also account for the code in DefineCollation() -- I don't think it makes sense to refactor one without the other. Regards, Jeff Davis [1] https://www.postgresql.org/message-id/78a1b434ff40510dc5aaabe986299a09f4da90cf.camel%40j-davis.com
From 129b35a2ecc7243def519e50525b0476220e17e6 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Fri, 29 Nov 2024 09:37:43 -0800 Subject: [PATCH v12 1/4] Control ctype behavior internally with a method table. Previously, pattern matching and case mapping behavior branched based on the provider. Refactor to use a method table, which is less error-prone and easier to hook. --- src/backend/regex/regc_pg_locale.c | 377 +++++----------------- src/backend/utils/adt/like.c | 22 +- src/backend/utils/adt/like_support.c | 7 +- src/backend/utils/adt/pg_locale.c | 101 +++--- src/backend/utils/adt/pg_locale_builtin.c | 106 +++++- src/backend/utils/adt/pg_locale_icu.c | 109 ++++++- src/backend/utils/adt/pg_locale_libc.c | 279 +++++++++++++--- src/include/utils/pg_locale.h | 49 +++ src/tools/pgindent/typedefs.list | 1 - 9 files changed, 618 insertions(+), 433 deletions(-) diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index 2360d08efae..31b8f4a9478 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -63,18 +63,13 @@ * NB: the coding here assumes pg_wchar is an unsigned type. */ -typedef enum -{ - PG_REGEX_STRATEGY_C, /* C locale (encoding independent) */ - PG_REGEX_STRATEGY_BUILTIN, /* built-in Unicode semantics */ - PG_REGEX_STRATEGY_LIBC_WIDE, /* Use locale_t <wctype.h> functions */ - PG_REGEX_STRATEGY_LIBC_1BYTE, /* Use locale_t <ctype.h> functions */ - PG_REGEX_STRATEGY_ICU, /* Use ICU uchar.h functions */ -} PG_Locale_Strategy; - -static PG_Locale_Strategy pg_regex_strategy; static pg_locale_t pg_regex_locale; +static struct pg_locale_struct dummy_c_locale = { + .collate_is_c = true, + .ctype_is_c = true, +}; + /* * Hard-wired character properties for C locale */ @@ -231,7 +226,6 @@ void pg_set_regex_collation(Oid collation) { pg_locale_t locale = 0; - PG_Locale_Strategy strategy; if (!OidIsValid(collation)) { @@ -252,8 +246,7 @@ pg_set_regex_collation(Oid collation) * catalog access is available, so we can't call * pg_newlocale_from_collation(). */ - strategy = PG_REGEX_STRATEGY_C; - locale = 0; + locale = &dummy_c_locale; } else { @@ -270,113 +263,41 @@ pg_set_regex_collation(Oid collation) * C/POSIX collations use this path regardless of database * encoding */ - strategy = PG_REGEX_STRATEGY_C; - locale = 0; - } - else if (locale->provider == COLLPROVIDER_BUILTIN) - { - Assert(GetDatabaseEncoding() == PG_UTF8); - strategy = PG_REGEX_STRATEGY_BUILTIN; - } -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - { - strategy = PG_REGEX_STRATEGY_ICU; - } -#endif - else - { - Assert(locale->provider == COLLPROVIDER_LIBC); - if (GetDatabaseEncoding() == PG_UTF8) - strategy = PG_REGEX_STRATEGY_LIBC_WIDE; - else - strategy = PG_REGEX_STRATEGY_LIBC_1BYTE; + locale = &dummy_c_locale; } } - pg_regex_strategy = strategy; pg_regex_locale = locale; } static int pg_wc_isdigit(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISDIGIT)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isdigit(c, true); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswdigit_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isdigit_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isdigit(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISDIGIT)); + else + return pg_regex_locale->ctype->wc_isdigit(c, pg_regex_locale); } static int pg_wc_isalpha(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISALPHA)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isalpha(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalpha_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isalpha_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isalpha(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISALPHA)); + else + return pg_regex_locale->ctype->wc_isalpha(c, pg_regex_locale); } static int pg_wc_isalnum(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISALNUM)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isalnum(c, true); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalnum_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isalnum_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isalnum(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISALNUM)); + else + return pg_regex_locale->ctype->wc_isalnum(c, pg_regex_locale); } static int @@ -391,219 +312,87 @@ pg_wc_isword(pg_wchar c) static int pg_wc_isupper(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISUPPER)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isupper(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswupper_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isupper_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isupper(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISUPPER)); + else + return pg_regex_locale->ctype->wc_isupper(c, pg_regex_locale); } static int pg_wc_islower(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISLOWER)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_islower(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswlower_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - islower_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_islower(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISLOWER)); + else + return pg_regex_locale->ctype->wc_islower(c, pg_regex_locale); } static int pg_wc_isgraph(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISGRAPH)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isgraph(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswgraph_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isgraph_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isgraph(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISGRAPH)); + else + return pg_regex_locale->ctype->wc_isgraph(c, pg_regex_locale); } static int pg_wc_isprint(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISPRINT)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isprint(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswprint_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isprint_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isprint(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISPRINT)); + else + return pg_regex_locale->ctype->wc_isprint(c, pg_regex_locale); } static int pg_wc_ispunct(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISPUNCT)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_ispunct(c, true); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswpunct_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - ispunct_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_ispunct(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISPUNCT)); + else + return pg_regex_locale->ctype->wc_ispunct(c, pg_regex_locale); } static int pg_wc_isspace(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISSPACE)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isspace(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswspace_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isspace_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isspace(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISSPACE)); + else + return pg_regex_locale->ctype->wc_isspace(c, pg_regex_locale); } static pg_wchar pg_wc_toupper(pg_wchar c) { - switch (pg_regex_strategy) + if (pg_regex_locale->ctype_is_c) { - case PG_REGEX_STRATEGY_C: - if (c <= (pg_wchar) 127) - return pg_ascii_toupper((unsigned char) c); - return c; - case PG_REGEX_STRATEGY_BUILTIN: - return unicode_uppercase_simple(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towupper_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - if (c <= (pg_wchar) UCHAR_MAX) - return toupper_l((unsigned char) c, pg_regex_locale->info.lt); - return c; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_toupper(c); -#endif - break; + if (c <= (pg_wchar) 127) + return pg_ascii_toupper((unsigned char) c); + return c; } - return 0; /* can't get here, but keep compiler quiet */ + else + return pg_regex_locale->ctype->wc_toupper(c, pg_regex_locale); } static pg_wchar pg_wc_tolower(pg_wchar c) { - switch (pg_regex_strategy) + if (pg_regex_locale->ctype_is_c) { - case PG_REGEX_STRATEGY_C: - if (c <= (pg_wchar) 127) - return pg_ascii_tolower((unsigned char) c); - return c; - case PG_REGEX_STRATEGY_BUILTIN: - return unicode_lowercase_simple(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towlower_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - if (c <= (pg_wchar) UCHAR_MAX) - return tolower_l((unsigned char) c, pg_regex_locale->info.lt); - return c; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_tolower(c); -#endif - break; + if (c <= (pg_wchar) 127) + return pg_ascii_tolower((unsigned char) c); + return c; } - return 0; /* can't get here, but keep compiler quiet */ + else + return pg_regex_locale->ctype->wc_tolower(c, pg_regex_locale); } @@ -729,37 +518,25 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode) * would always be true for production values of MAX_SIMPLE_CHR, but it's * useful to allow it to be small for testing purposes.) */ - switch (pg_regex_strategy) + if (pg_regex_locale->ctype_is_c) { - case PG_REGEX_STRATEGY_C: #if MAX_SIMPLE_CHR >= 127 - max_chr = (pg_wchar) 127; - pcc->cv.cclasscode = -1; + max_chr = (pg_wchar) 127; + pcc->cv.cclasscode = -1; #else - max_chr = (pg_wchar) MAX_SIMPLE_CHR; + max_chr = (pg_wchar) MAX_SIMPLE_CHR; #endif - break; - case PG_REGEX_STRATEGY_BUILTIN: - max_chr = (pg_wchar) MAX_SIMPLE_CHR; - break; - case PG_REGEX_STRATEGY_LIBC_WIDE: - max_chr = (pg_wchar) MAX_SIMPLE_CHR; - break; - case PG_REGEX_STRATEGY_LIBC_1BYTE: -#if MAX_SIMPLE_CHR >= UCHAR_MAX - max_chr = (pg_wchar) UCHAR_MAX; + } + else + { + if (pg_regex_locale->ctype->max_chr != 0 && + pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR) + { + max_chr = pg_regex_locale->ctype->max_chr; pcc->cv.cclasscode = -1; -#else - max_chr = (pg_wchar) MAX_SIMPLE_CHR; -#endif - break; - case PG_REGEX_STRATEGY_ICU: + } + else max_chr = (pg_wchar) MAX_SIMPLE_CHR; - break; - default: - Assert(false); - max_chr = 0; /* can't get here, but keep compiler quiet */ - break; } /* diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 7f4cf614585..4216ac17f43 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -98,7 +98,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale) else if (locale->is_default) return pg_tolower(c); else - return tolower_l(c, locale->info.lt); + return char_tolower(c, locale); } @@ -209,7 +209,17 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) * way. */ - if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU)) + if (locale->ctype_is_c || + (char_tolower_enabled(locale) && + pg_database_encoding_max_length() == 1)) + { + p = VARDATA_ANY(pat); + plen = VARSIZE_ANY_EXHDR(pat); + s = VARDATA_ANY(str); + slen = VARSIZE_ANY_EXHDR(str); + return SB_IMatchText(s, slen, p, plen, locale); + } + else { pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation, PointerGetDatum(pat))); @@ -224,14 +234,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) else return MB_MatchText(s, slen, p, plen, 0); } - else - { - p = VARDATA_ANY(pat); - plen = VARSIZE_ANY_EXHDR(pat); - s = VARDATA_ANY(str); - slen = VARSIZE_ANY_EXHDR(str); - return SB_IMatchText(s, slen, p, plen, locale); - } } /* diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index 8fdc677371f..999f23f86d5 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -1495,13 +1495,8 @@ pattern_char_isalpha(char c, bool is_multibyte, { if (locale->ctype_is_c) return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); - else if (is_multibyte && IS_HIGHBIT_SET(c)) - return true; - else if (locale->provider != COLLPROVIDER_LIBC) - return IS_HIGHBIT_SET(c) || - (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); else - return isalpha_l((unsigned char) c, locale->info.lt); + return char_is_cased(c, locale); } diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 875cca6efc8..cdb4950ac47 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -100,27 +100,6 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context); extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); extern char *get_collation_actual_version_libc(const char *collcollate); -extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - -extern size_t strlower_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - -extern size_t strlower_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - /* GUC settings */ char *locale_messages; char *locale_monetary; @@ -1232,6 +1211,9 @@ create_pg_locale(Oid collid, MemoryContext context) Assert((result->collate_is_c && result->collate == NULL) || (!result->collate_is_c && result->collate != NULL)); + Assert((result->ctype_is_c && result->ctype == NULL) || + (!result->ctype_is_c && result->ctype != NULL)); + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion, &isnull); if (!isnull) @@ -1394,57 +1376,21 @@ size_t pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strlower_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strlower_icu(dst, dstsize, src, srclen, locale); -#endif - else if (locale->provider == COLLPROVIDER_LIBC) - return strlower_libc(dst, dstsize, src, srclen, locale); - else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strlower(dst, dstsize, src, srclen, locale); } size_t pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strtitle_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strtitle_icu(dst, dstsize, src, srclen, locale); -#endif - else if (locale->provider == COLLPROVIDER_LIBC) - return strtitle_libc(dst, dstsize, src, srclen, locale); - else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strtitle(dst, dstsize, src, srclen, locale); } size_t pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strupper_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strupper_icu(dst, dstsize, src, srclen, locale); -#endif - else if (locale->provider == COLLPROVIDER_LIBC) - return strupper_libc(dst, dstsize, src, srclen, locale); - else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strupper(dst, dstsize, src, srclen, locale); } /* @@ -1581,6 +1527,41 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale); } +/* + * char_is_cased() + * + * Fuzzy test of whether the given char is case-varying or not. The argument + * is a single byte, so in a multibyte encoding, just assume any non-ASCII + * char is case-varying. + */ +bool +char_is_cased(char ch, pg_locale_t locale) +{ + return locale->ctype->char_is_cased(ch, locale); +} + +/* + * char_tolower_enabled() + * + * Does the provider support char_tolower()? + */ +bool +char_tolower_enabled(pg_locale_t locale) +{ + return (locale->ctype->char_tolower != NULL); +} + +/* + * char_tolower() + * + * Convert char (single-byte encoding) to lowercase. + */ +char +char_tolower(unsigned char ch, pg_locale_t locale) +{ + return locale->ctype->char_tolower(ch, locale); +} + /* * Return required encoding ID for the given locale, or -1 if any encoding is * valid for the locale. diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c index 5161915e6b1..aa7d0e3d6cb 100644 --- a/src/backend/utils/adt/pg_locale_builtin.c +++ b/src/backend/utils/adt/pg_locale_builtin.c @@ -25,13 +25,6 @@ extern pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context); extern char *get_collation_actual_version_builtin(const char *collcollate); -extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - struct WordBoundaryState { @@ -74,14 +67,14 @@ initcap_wbnext(void *state) return wbstate->len; } -size_t +static size_t strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { return unicode_strlower(dest, destsize, src, srclen); } -size_t +static size_t strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -97,13 +90,104 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, initcap_wbnext, &wbstate); } -size_t +static size_t strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { return unicode_strupper(dest, destsize, src, srclen); } +static bool +wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isdigit(wc, true); +} + +static bool +wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isalpha(wc); +} + +static bool +wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isalnum(wc, true); +} + +static bool +wc_isupper_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isupper(wc); +} + +static bool +wc_islower_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_islower(wc); +} + +static bool +wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isgraph(wc); +} + +static bool +wc_isprint_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isprint(wc); +} + +static bool +wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_ispunct(wc, true); +} + +static bool +wc_isspace_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isspace(wc); +} + +static bool +char_is_cased_builtin(char ch, pg_locale_t locale) +{ + return IS_HIGHBIT_SET(ch) || + (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); +} + +static pg_wchar +wc_toupper_builtin(pg_wchar wc, pg_locale_t locale) +{ + return unicode_uppercase_simple(wc); +} + +static pg_wchar +wc_tolower_builtin(pg_wchar wc, pg_locale_t locale) +{ + return unicode_lowercase_simple(wc); +} + +static const struct ctype_methods ctype_methods_builtin = { + .strlower = strlower_builtin, + .strtitle = strtitle_builtin, + .strupper = strupper_builtin, + .wc_isdigit = wc_isdigit_builtin, + .wc_isalpha = wc_isalpha_builtin, + .wc_isalnum = wc_isalnum_builtin, + .wc_isupper = wc_isupper_builtin, + .wc_islower = wc_islower_builtin, + .wc_isgraph = wc_isgraph_builtin, + .wc_isprint = wc_isprint_builtin, + .wc_ispunct = wc_ispunct_builtin, + .wc_isspace = wc_isspace_builtin, + .char_is_cased = char_is_cased_builtin, + .wc_tolower = wc_tolower_builtin, + .wc_toupper = wc_toupper_builtin, +}; + pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context) { @@ -146,6 +230,8 @@ create_pg_locale_builtin(Oid collid, MemoryContext context) result->deterministic = true; result->collate_is_c = true; result->ctype_is_c = (strcmp(locstr, "C") == 0); + if (!result->ctype_is_c) + result->ctype = &ctype_methods_builtin; return result; } diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c index 5185b0f7289..3e9a2e0cfaa 100644 --- a/src/backend/utils/adt/pg_locale_icu.c +++ b/src/backend/utils/adt/pg_locale_icu.c @@ -48,17 +48,17 @@ #define TEXTBUFLEN 1024 extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context); -extern size_t strlower_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); #ifdef USE_ICU extern UCollator *pg_ucol_open(const char *loc_str); +static size_t strlower_icu(char *dst, size_t dstsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static size_t strtitle_icu(char *dst, size_t dstsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static size_t strupper_icu(char *dst, size_t dstsize, const char *src, + ssize_t srclen, pg_locale_t locale); static int strncoll_icu(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale); @@ -118,6 +118,25 @@ static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity, const char *locale, UErrorCode *pErrorCode); +static bool +char_is_cased_icu(char ch, pg_locale_t locale) +{ + return IS_HIGHBIT_SET(ch) || + (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); +} + +static pg_wchar +toupper_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_toupper(wc); +} + +static pg_wchar +tolower_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_tolower(wc); +} + static const struct collate_methods collate_methods_icu = { .strncoll = strncoll_icu, .strnxfrm = strnxfrm_icu, @@ -136,6 +155,77 @@ static const struct collate_methods collate_methods_icu_utf8 = { .strxfrm_is_safe = true, }; +static bool +wc_isdigit_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isdigit(wc); +} + +static bool +wc_isalpha_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isalpha(wc); +} + +static bool +wc_isalnum_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isalnum(wc); +} + +static bool +wc_isupper_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isupper(wc); +} + +static bool +wc_islower_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_islower(wc); +} + +static bool +wc_isgraph_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isgraph(wc); +} + +static bool +wc_isprint_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isprint(wc); +} + +static bool +wc_ispunct_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_ispunct(wc); +} + +static bool +wc_isspace_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isspace(wc); +} + +static const struct ctype_methods ctype_methods_icu = { + .strlower = strlower_icu, + .strtitle = strtitle_icu, + .strupper = strupper_icu, + .wc_isdigit = wc_isdigit_icu, + .wc_isalpha = wc_isalpha_icu, + .wc_isalnum = wc_isalnum_icu, + .wc_isupper = wc_isupper_icu, + .wc_islower = wc_islower_icu, + .wc_isgraph = wc_isgraph_icu, + .wc_isprint = wc_isprint_icu, + .wc_ispunct = wc_ispunct_icu, + .wc_isspace = wc_isspace_icu, + .char_is_cased = char_is_cased_icu, + .wc_toupper = toupper_icu, + .wc_tolower = tolower_icu, +}; #endif pg_locale_t @@ -206,6 +296,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context) result->collate = &collate_methods_icu_utf8; else result->collate = &collate_methods_icu; + result->ctype = &ctype_methods_icu; return result; #else @@ -379,7 +470,7 @@ make_icu_collator(const char *iculocstr, const char *icurules) } } -size_t +static size_t strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -399,7 +490,7 @@ strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, return result_len; } -size_t +static size_t strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -419,7 +510,7 @@ strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, return result_len; } -size_t +static size_t strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index 8f9a8637897..1144c6ff304 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -43,13 +43,6 @@ extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); -extern size_t strlower_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - static int strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale); @@ -86,6 +79,239 @@ static size_t strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); +static bool +wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isdigit_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isalpha_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isalnum_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isupper_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return islower_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isgraph_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isprint_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return ispunct_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isspace_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswdigit_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswalpha_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswalnum_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswupper_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswlower_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswgraph_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswprint_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswpunct_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswspace_l((wint_t) wc, locale->info.lt); +} + +static char +char_tolower_libc(unsigned char ch, pg_locale_t locale) +{ + Assert(pg_database_encoding_max_length() == 1); + return tolower_l(ch, locale->info.lt); +} + +static bool +char_is_cased_libc(char ch, pg_locale_t locale) +{ + bool is_multibyte = pg_database_encoding_max_length() > 1; + + if (is_multibyte && IS_HIGHBIT_SET(ch)) + return true; + else + return isalpha_l((unsigned char) ch, locale->info.lt); +} + +static pg_wchar +toupper_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() != PG_UTF8); + + if (wc <= (pg_wchar) UCHAR_MAX) + return toupper_l((unsigned char) wc, locale->info.lt); + else + return wc; +} + +static pg_wchar +toupper_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() == PG_UTF8); + + if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF) + return towupper_l((wint_t) wc, locale->info.lt); + else + return wc; +} + +static pg_wchar +tolower_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() != PG_UTF8); + + if (wc <= (pg_wchar) UCHAR_MAX) + return tolower_l((unsigned char) wc, locale->info.lt); + else + return wc; +} + +static pg_wchar +tolower_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() == PG_UTF8); + + if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF) + return towlower_l((wint_t) wc, locale->info.lt); + else + return wc; +} + +static const struct ctype_methods ctype_methods_libc_sb = { + .strlower = strlower_libc_sb, + .strtitle = strtitle_libc_sb, + .strupper = strupper_libc_sb, + .wc_isdigit = wc_isdigit_libc_sb, + .wc_isalpha = wc_isalpha_libc_sb, + .wc_isalnum = wc_isalnum_libc_sb, + .wc_isupper = wc_isupper_libc_sb, + .wc_islower = wc_islower_libc_sb, + .wc_isgraph = wc_isgraph_libc_sb, + .wc_isprint = wc_isprint_libc_sb, + .wc_ispunct = wc_ispunct_libc_sb, + .wc_isspace = wc_isspace_libc_sb, + .char_is_cased = char_is_cased_libc, + .char_tolower = char_tolower_libc, + .wc_toupper = toupper_libc_sb, + .wc_tolower = tolower_libc_sb, + .max_chr = UCHAR_MAX, +}; + +/* + * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but + * single-byte semantics for pattern matching. + */ +static const struct ctype_methods ctype_methods_libc_other_mb = { + .strlower = strlower_libc_mb, + .strtitle = strtitle_libc_mb, + .strupper = strupper_libc_mb, + .wc_isdigit = wc_isdigit_libc_sb, + .wc_isalpha = wc_isalpha_libc_sb, + .wc_isalnum = wc_isalnum_libc_sb, + .wc_isupper = wc_isupper_libc_sb, + .wc_islower = wc_islower_libc_sb, + .wc_isgraph = wc_isgraph_libc_sb, + .wc_isprint = wc_isprint_libc_sb, + .wc_ispunct = wc_ispunct_libc_sb, + .wc_isspace = wc_isspace_libc_sb, + .char_is_cased = char_is_cased_libc, + .char_tolower = char_tolower_libc, + .wc_toupper = toupper_libc_sb, + .wc_tolower = tolower_libc_sb, + .max_chr = UCHAR_MAX, +}; + +static const struct ctype_methods ctype_methods_libc_utf8 = { + .strlower = strlower_libc_mb, + .strtitle = strtitle_libc_mb, + .strupper = strupper_libc_mb, + .wc_isdigit = wc_isdigit_libc_mb, + .wc_isalpha = wc_isalpha_libc_mb, + .wc_isalnum = wc_isalnum_libc_mb, + .wc_isupper = wc_isupper_libc_mb, + .wc_islower = wc_islower_libc_mb, + .wc_isgraph = wc_isgraph_libc_mb, + .wc_isprint = wc_isprint_libc_mb, + .wc_ispunct = wc_ispunct_libc_mb, + .wc_isspace = wc_isspace_libc_mb, + .char_is_cased = char_is_cased_libc, + .char_tolower = char_tolower_libc, + .wc_toupper = toupper_libc_mb, + .wc_tolower = tolower_libc_mb, +}; + static const struct collate_methods collate_methods_libc = { .strncoll = strncoll_libc, .strnxfrm = strnxfrm_libc, @@ -120,36 +346,6 @@ static const struct collate_methods collate_methods_libc_win32_utf8 = { }; #endif -size_t -strlower_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) -{ - if (pg_database_encoding_max_length() > 1) - return strlower_libc_mb(dst, dstsize, src, srclen, locale); - else - return strlower_libc_sb(dst, dstsize, src, srclen, locale); -} - -size_t -strtitle_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) -{ - if (pg_database_encoding_max_length() > 1) - return strtitle_libc_mb(dst, dstsize, src, srclen, locale); - else - return strtitle_libc_sb(dst, dstsize, src, srclen, locale); -} - -size_t -strupper_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) -{ - if (pg_database_encoding_max_length() > 1) - return strupper_libc_mb(dst, dstsize, src, srclen, locale); - else - return strupper_libc_sb(dst, dstsize, src, srclen, locale); -} - static size_t strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) @@ -482,6 +678,15 @@ create_pg_locale_libc(Oid collid, MemoryContext context) #endif result->collate = &collate_methods_libc; } + if (!result->ctype_is_c) + { + if (GetDatabaseEncoding() == PG_UTF8) + result->ctype = &ctype_methods_libc_utf8; + else if (pg_database_encoding_max_length() > 1) + result->ctype = &ctype_methods_libc_other_mb; + else + result->ctype = &ctype_methods_libc_sb; + } return result; } diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index ec42ca3da4c..b64135ab389 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -12,6 +12,8 @@ #ifndef _PG_LOCALE_ #define _PG_LOCALE_ +#include "mb/pg_wchar.h" + #ifdef USE_ICU #include <unicode/ucol.h> #endif @@ -77,6 +79,49 @@ struct collate_methods bool strxfrm_is_safe; }; +struct ctype_methods +{ + /* case mapping: LOWER()/INITCAP()/UPPER() */ + size_t (*strlower) (char *dest, size_t destsize, + const char *src, ssize_t srclen, + pg_locale_t locale); + size_t (*strtitle) (char *dest, size_t destsize, + const char *src, ssize_t srclen, + pg_locale_t locale); + size_t (*strupper) (char *dest, size_t destsize, + const char *src, ssize_t srclen, + pg_locale_t locale); + + /* required */ + bool (*wc_isdigit) (pg_wchar wc, pg_locale_t locale); + bool (*wc_isalpha) (pg_wchar wc, pg_locale_t locale); + bool (*wc_isalnum) (pg_wchar wc, pg_locale_t locale); + bool (*wc_isupper) (pg_wchar wc, pg_locale_t locale); + bool (*wc_islower) (pg_wchar wc, pg_locale_t locale); + bool (*wc_isgraph) (pg_wchar wc, pg_locale_t locale); + bool (*wc_isprint) (pg_wchar wc, pg_locale_t locale); + bool (*wc_ispunct) (pg_wchar wc, pg_locale_t locale); + bool (*wc_isspace) (pg_wchar wc, pg_locale_t locale); + pg_wchar (*wc_toupper) (pg_wchar wc, pg_locale_t locale); + pg_wchar (*wc_tolower) (pg_wchar wc, pg_locale_t locale); + + /* required */ + bool (*char_is_cased) (char ch, pg_locale_t locale); + + /* + * Optional. If defined, will only be called for single-byte encodings. If + * not defined, or if the encoding is multibyte, will fall back to + * pg_strlower(). + */ + char (*char_tolower) (unsigned char ch, pg_locale_t locale); + + /* + * For regex and pattern matching efficiency, the maximum char value + * supported by the above methods. If zero, limit is set by regex code. + */ + pg_wchar max_chr; +}; + /* * We use a discriminated union to hold either a locale_t or an ICU collator. * pg_locale_t is occasionally checked for truth, so make it a pointer. @@ -102,6 +147,7 @@ struct pg_locale_struct bool is_default; const struct collate_methods *collate; /* NULL if collate_is_c */ + const struct ctype_methods *ctype; /* NULL if ctype_is_c */ union { @@ -124,6 +170,9 @@ extern void init_database_collation(void); extern pg_locale_t pg_newlocale_from_collation(Oid collid); extern char *get_collation_actual_version(char collprovider, const char *collcollate); +extern bool char_is_cased(char ch, pg_locale_t locale); +extern bool char_tolower_enabled(pg_locale_t locale); +extern char char_tolower(unsigned char ch, pg_locale_t locale); extern size_t pg_strlower(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9f83ecf181f..a869d6b7283 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1826,7 +1826,6 @@ PGTargetServerType PGTernaryBool PGTransactionStatusType PGVerbosity -PG_Locale_Strategy PG_Lock_Status PG_init_t PGcancel -- 2.34.1
From c7f7159cdd31cc1f10ac35e43afc39b2074ecefe Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Mon, 7 Oct 2024 12:51:27 -0700 Subject: [PATCH v12 2/4] Remove provider field from pg_locale_t. The behavior of pg_locale_t is entirely specified by methods, so a separate provider field is no longer necessary. --- src/backend/utils/adt/pg_locale_builtin.c | 1 - src/backend/utils/adt/pg_locale_icu.c | 11 ----------- src/backend/utils/adt/pg_locale_libc.c | 6 ------ src/include/utils/pg_locale.h | 1 - 4 files changed, 19 deletions(-) diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c index aa7d0e3d6cb..4db21882ac3 100644 --- a/src/backend/utils/adt/pg_locale_builtin.c +++ b/src/backend/utils/adt/pg_locale_builtin.c @@ -226,7 +226,6 @@ create_pg_locale_builtin(Oid collid, MemoryContext context) result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); result->info.builtin.locale = MemoryContextStrdup(context, locstr); - result->provider = COLLPROVIDER_BUILTIN; result->deterministic = true; result->collate_is_c = true; result->ctype_is_c = (strcmp(locstr, "C") == 0); diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c index 3e9a2e0cfaa..e4f0398c217 100644 --- a/src/backend/utils/adt/pg_locale_icu.c +++ b/src/backend/utils/adt/pg_locale_icu.c @@ -288,7 +288,6 @@ create_pg_locale_icu(Oid collid, MemoryContext context) result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); result->info.icu.locale = MemoryContextStrdup(context, iculocstr); result->info.icu.ucol = collator; - result->provider = COLLPROVIDER_ICU; result->deterministic = deterministic; result->collate_is_c = false; result->ctype_is_c = false; @@ -545,8 +544,6 @@ strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2 int result; UErrorCode status; - Assert(locale->provider == COLLPROVIDER_ICU); - Assert(GetDatabaseEncoding() == PG_UTF8); status = U_ZERO_ERROR; @@ -574,8 +571,6 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t uchar_bsize; Size result_bsize; - Assert(locale->provider == COLLPROVIDER_ICU); - init_icu_converter(); ulen = uchar_length(icu_converter, src, srclen); @@ -620,8 +615,6 @@ strnxfrm_prefix_icu_utf8(char *dest, size_t destsize, uint32_t state[2]; UErrorCode status; - Assert(locale->provider == COLLPROVIDER_ICU); - Assert(GetDatabaseEncoding() == PG_UTF8); uiter_setUTF8(&iter, src, srclen); @@ -788,8 +781,6 @@ strncoll_icu(const char *arg1, ssize_t len1, *uchar2; int result; - Assert(locale->provider == COLLPROVIDER_ICU); - /* if encoding is UTF8, use more efficient strncoll_icu_utf8 */ #ifdef HAVE_UCOL_STRCOLLUTF8 Assert(GetDatabaseEncoding() != PG_UTF8); @@ -838,8 +829,6 @@ strnxfrm_prefix_icu(char *dest, size_t destsize, size_t uchar_bsize; Size result_bsize; - Assert(locale->provider == COLLPROVIDER_ICU); - /* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */ Assert(GetDatabaseEncoding() != PG_UTF8); diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index 1144c6ff304..1582f8cdd2a 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -662,7 +662,6 @@ create_pg_locale_libc(Oid collid, MemoryContext context) loc = make_libc_collator(collate, ctype); result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); - result->provider = COLLPROVIDER_LIBC; result->deterministic = true; result->collate_is_c = (strcmp(collate, "C") == 0) || (strcmp(collate, "POSIX") == 0); @@ -782,8 +781,6 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, const char *arg2n; int result; - Assert(locale->provider == COLLPROVIDER_LIBC); - if (bufsize1 + bufsize2 > TEXTBUFLEN) buf = palloc(bufsize1 + bufsize2); @@ -838,8 +835,6 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t bufsize = srclen + 1; size_t result; - Assert(locale->provider == COLLPROVIDER_LIBC); - if (srclen == -1) return strxfrm_l(dest, src, destsize, locale->info.lt); @@ -948,7 +943,6 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2, int r; int result; - Assert(locale->provider == COLLPROVIDER_LIBC); Assert(GetDatabaseEncoding() == PG_UTF8); if (len1 == -1) diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index b64135ab389..d9650cec5cc 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -140,7 +140,6 @@ struct ctype_methods */ struct pg_locale_struct { - char provider; bool deterministic; bool collate_is_c; bool ctype_is_c; -- 2.34.1
From ad4371e6f641479275dbb21cda7d615393831271 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Mon, 7 Oct 2024 13:36:44 -0700 Subject: [PATCH v12 3/4] Make provider data in pg_locale_t an opaque pointer. --- src/backend/utils/adt/pg_locale_builtin.c | 11 +- src/backend/utils/adt/pg_locale_icu.c | 40 ++++-- src/backend/utils/adt/pg_locale_libc.c | 149 +++++++++++++++------- src/include/utils/pg_locale.h | 16 +-- 4 files changed, 143 insertions(+), 73 deletions(-) diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c index 4db21882ac3..77768735149 100644 --- a/src/backend/utils/adt/pg_locale_builtin.c +++ b/src/backend/utils/adt/pg_locale_builtin.c @@ -26,6 +26,11 @@ extern pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context); extern char *get_collation_actual_version_builtin(const char *collcollate); +struct builtin_provider +{ + const char *locale; +}; + struct WordBoundaryState { const char *str; @@ -192,6 +197,7 @@ pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context) { const char *locstr; + struct builtin_provider *builtin; pg_locale_t result; if (collid == DEFAULT_COLLATION_OID) @@ -225,7 +231,10 @@ create_pg_locale_builtin(Oid collid, MemoryContext context) result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); - result->info.builtin.locale = MemoryContextStrdup(context, locstr); + builtin = MemoryContextAllocZero(context, sizeof(struct builtin_provider)); + builtin->locale = MemoryContextStrdup(context, locstr); + result->provider_data = (void *) builtin; + result->deterministic = true; result->collate_is_c = true; result->ctype_is_c = (strcmp(locstr, "C") == 0); diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c index e4f0398c217..7bd58f26c44 100644 --- a/src/backend/utils/adt/pg_locale_icu.c +++ b/src/backend/utils/adt/pg_locale_icu.c @@ -51,6 +51,12 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context); #ifdef USE_ICU +struct icu_provider +{ + const char *locale; + UCollator *ucol; +}; + extern UCollator *pg_ucol_open(const char *loc_str); static size_t strlower_icu(char *dst, size_t dstsize, const char *src, @@ -235,6 +241,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context) bool deterministic; const char *iculocstr; const char *icurules = NULL; + struct icu_provider *icu; UCollator *collator; pg_locale_t result; @@ -286,8 +293,12 @@ create_pg_locale_icu(Oid collid, MemoryContext context) collator = make_icu_collator(iculocstr, icurules); result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); - result->info.icu.locale = MemoryContextStrdup(context, iculocstr); - result->info.icu.ucol = collator; + + icu = MemoryContextAllocZero(context, sizeof(struct icu_provider)); + icu->locale = MemoryContextStrdup(context, iculocstr); + icu->ucol = collator; + result->provider_data = (void *) icu; + result->deterministic = deterministic; result->collate_is_c = false; result->ctype_is_c = false; @@ -543,11 +554,12 @@ strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2 { int result; UErrorCode status; + struct icu_provider *icu = (struct icu_provider *) locale->provider_data; Assert(GetDatabaseEncoding() == PG_UTF8); status = U_ZERO_ERROR; - result = ucol_strcollUTF8(locale->info.icu.ucol, + result = ucol_strcollUTF8(icu->ucol, arg1, len1, arg2, len2, &status); @@ -571,6 +583,8 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t uchar_bsize; Size result_bsize; + struct icu_provider *icu = (struct icu_provider *) locale->provider_data; + init_icu_converter(); ulen = uchar_length(icu_converter, src, srclen); @@ -584,7 +598,7 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen); - result_bsize = ucol_getSortKey(locale->info.icu.ucol, + result_bsize = ucol_getSortKey(icu->ucol, uchar, ulen, (uint8_t *) dest, destsize); @@ -615,12 +629,14 @@ strnxfrm_prefix_icu_utf8(char *dest, size_t destsize, uint32_t state[2]; UErrorCode status; + struct icu_provider *icu = (struct icu_provider *) locale->provider_data; + Assert(GetDatabaseEncoding() == PG_UTF8); uiter_setUTF8(&iter, src, srclen); state[0] = state[1] = 0; /* won't need that again */ status = U_ZERO_ERROR; - result = ucol_nextSortKeyPart(locale->info.icu.ucol, + result = ucol_nextSortKeyPart(icu->ucol, &iter, state, (uint8_t *) dest, @@ -727,11 +743,13 @@ icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale, UErrorCode status; int32_t len_dest; + struct icu_provider *icu = (struct icu_provider *) mylocale->provider_data; + len_dest = len_source; /* try first with same length */ *buff_dest = palloc(len_dest * sizeof(**buff_dest)); status = U_ZERO_ERROR; len_dest = func(*buff_dest, len_dest, buff_source, len_source, - mylocale->info.icu.locale, &status); + icu->locale, &status); if (status == U_BUFFER_OVERFLOW_ERROR) { /* try again with adjusted length */ @@ -739,7 +757,7 @@ icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale, *buff_dest = palloc(len_dest * sizeof(**buff_dest)); status = U_ZERO_ERROR; len_dest = func(*buff_dest, len_dest, buff_source, len_source, - mylocale->info.icu.locale, &status); + icu->locale, &status); } if (U_FAILURE(status)) ereport(ERROR, @@ -781,6 +799,8 @@ strncoll_icu(const char *arg1, ssize_t len1, *uchar2; int result; + struct icu_provider *icu = (struct icu_provider *) locale->provider_data; + /* if encoding is UTF8, use more efficient strncoll_icu_utf8 */ #ifdef HAVE_UCOL_STRCOLLUTF8 Assert(GetDatabaseEncoding() != PG_UTF8); @@ -803,7 +823,7 @@ strncoll_icu(const char *arg1, ssize_t len1, ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1); ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2); - result = ucol_strcoll(locale->info.icu.ucol, + result = ucol_strcoll(icu->ucol, uchar1, ulen1, uchar2, ulen2); @@ -829,6 +849,8 @@ strnxfrm_prefix_icu(char *dest, size_t destsize, size_t uchar_bsize; Size result_bsize; + struct icu_provider *icu = (struct icu_provider *) locale->provider_data; + /* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */ Assert(GetDatabaseEncoding() != PG_UTF8); @@ -848,7 +870,7 @@ strnxfrm_prefix_icu(char *dest, size_t destsize, uiter_setString(&iter, uchar, ulen); state[0] = state[1] = 0; /* won't need that again */ status = U_ZERO_ERROR; - result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol, + result_bsize = ucol_nextSortKeyPart(icu->ucol, &iter, state, (uint8_t *) dest, diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index 1582f8cdd2a..1d990a612b4 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -1,3 +1,4 @@ + /*----------------------------------------------------------------------- * * PostgreSQL locale utilities for libc @@ -41,6 +42,11 @@ */ #define TEXTBUFLEN 1024 +struct libc_provider +{ + locale_t lt; +}; + extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); static int strncoll_libc(const char *arg1, ssize_t len1, @@ -82,116 +88,136 @@ static size_t strupper_libc_mb(char *dest, size_t destsize, static bool wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale) { - return isdigit_l((unsigned char) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return isdigit_l((unsigned char) wc, libc->lt); } static bool wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale) { - return isalpha_l((unsigned char) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return isalpha_l((unsigned char) wc, libc->lt); } static bool wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale) { - return isalnum_l((unsigned char) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return isalnum_l((unsigned char) wc, libc->lt); } static bool wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale) { - return isupper_l((unsigned char) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return isupper_l((unsigned char) wc, libc->lt); } static bool wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale) { - return islower_l((unsigned char) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return islower_l((unsigned char) wc, libc->lt); } static bool wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale) { - return isgraph_l((unsigned char) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return isgraph_l((unsigned char) wc, libc->lt); } static bool wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale) { - return isprint_l((unsigned char) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return isprint_l((unsigned char) wc, libc->lt); } static bool wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale) { - return ispunct_l((unsigned char) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return ispunct_l((unsigned char) wc, libc->lt); } static bool wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale) { - return isspace_l((unsigned char) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return isspace_l((unsigned char) wc, libc->lt); } static bool wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale) { - return iswdigit_l((wint_t) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return iswdigit_l((wint_t) wc, libc->lt); } static bool wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale) { - return iswalpha_l((wint_t) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return iswalpha_l((wint_t) wc, libc->lt); } static bool wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale) { - return iswalnum_l((wint_t) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return iswalnum_l((wint_t) wc, libc->lt); } static bool wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale) { - return iswupper_l((wint_t) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return iswupper_l((wint_t) wc, libc->lt); } static bool wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale) { - return iswlower_l((wint_t) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return iswlower_l((wint_t) wc, libc->lt); } static bool wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale) { - return iswgraph_l((wint_t) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return iswgraph_l((wint_t) wc, libc->lt); } static bool wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale) { - return iswprint_l((wint_t) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return iswprint_l((wint_t) wc, libc->lt); } static bool wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale) { - return iswpunct_l((wint_t) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return iswpunct_l((wint_t) wc, libc->lt); } static bool wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale) { - return iswspace_l((wint_t) wc, locale->info.lt); + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + return iswspace_l((wint_t) wc, libc->lt); } static char char_tolower_libc(unsigned char ch, pg_locale_t locale) { + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + Assert(pg_database_encoding_max_length() == 1); - return tolower_l(ch, locale->info.lt); + return tolower_l(ch, libc->lt); } static bool @@ -199,19 +225,23 @@ char_is_cased_libc(char ch, pg_locale_t locale) { bool is_multibyte = pg_database_encoding_max_length() > 1; + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + if (is_multibyte && IS_HIGHBIT_SET(ch)) return true; else - return isalpha_l((unsigned char) ch, locale->info.lt); + return isalpha_l((unsigned char) ch, libc->lt); } static pg_wchar toupper_libc_sb(pg_wchar wc, pg_locale_t locale) { + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + Assert(GetDatabaseEncoding() != PG_UTF8); if (wc <= (pg_wchar) UCHAR_MAX) - return toupper_l((unsigned char) wc, locale->info.lt); + return toupper_l((unsigned char) wc, libc->lt); else return wc; } @@ -219,10 +249,12 @@ toupper_libc_sb(pg_wchar wc, pg_locale_t locale) static pg_wchar toupper_libc_mb(pg_wchar wc, pg_locale_t locale) { + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + Assert(GetDatabaseEncoding() == PG_UTF8); if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF) - return towupper_l((wint_t) wc, locale->info.lt); + return towupper_l((wint_t) wc, libc->lt); else return wc; } @@ -230,10 +262,12 @@ toupper_libc_mb(pg_wchar wc, pg_locale_t locale) static pg_wchar tolower_libc_sb(pg_wchar wc, pg_locale_t locale) { + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + Assert(GetDatabaseEncoding() != PG_UTF8); if (wc <= (pg_wchar) UCHAR_MAX) - return tolower_l((unsigned char) wc, locale->info.lt); + return tolower_l((unsigned char) wc, libc->lt); else return wc; } @@ -241,10 +275,12 @@ tolower_libc_sb(pg_wchar wc, pg_locale_t locale) static pg_wchar tolower_libc_mb(pg_wchar wc, pg_locale_t locale) { + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + Assert(GetDatabaseEncoding() == PG_UTF8); if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF) - return towlower_l((wint_t) wc, locale->info.lt); + return towlower_l((wint_t) wc, libc->lt); else return wc; } @@ -355,7 +391,7 @@ strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, if (srclen + 1 <= destsize) { - locale_t loc = locale->info.lt; + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; char *p; if (srclen + 1 > destsize) @@ -376,7 +412,7 @@ strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, if (locale->is_default) *p = pg_tolower((unsigned char) *p); else - *p = tolower_l((unsigned char) *p, loc); + *p = tolower_l((unsigned char) *p, libc->lt); } } @@ -387,7 +423,8 @@ static size_t strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { - locale_t loc = locale->info.lt; + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + size_t result_size; wchar_t *workspace; char *result; @@ -409,7 +446,7 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, char2wchar(workspace, srclen + 1, src, srclen, locale); for (curr_char = 0; workspace[curr_char] != 0; curr_char++) - workspace[curr_char] = towlower_l(workspace[curr_char], loc); + workspace[curr_char] = towlower_l(workspace[curr_char], libc->lt); /* * Make result large enough; case change might change number of bytes @@ -440,7 +477,7 @@ strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, if (srclen + 1 <= destsize) { - locale_t loc = locale->info.lt; + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; int wasalnum = false; char *p; @@ -466,11 +503,11 @@ strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, else { if (wasalnum) - *p = tolower_l((unsigned char) *p, loc); + *p = tolower_l((unsigned char) *p, libc->lt); else - *p = toupper_l((unsigned char) *p, loc); + *p = toupper_l((unsigned char) *p, libc->lt); } - wasalnum = isalnum_l((unsigned char) *p, loc); + wasalnum = isalnum_l((unsigned char) *p, libc->lt); } } @@ -481,7 +518,8 @@ static size_t strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { - locale_t loc = locale->info.lt; + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + int wasalnum = false; size_t result_size; wchar_t *workspace; @@ -506,10 +544,10 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, for (curr_char = 0; workspace[curr_char] != 0; curr_char++) { if (wasalnum) - workspace[curr_char] = towlower_l(workspace[curr_char], loc); + workspace[curr_char] = towlower_l(workspace[curr_char], libc->lt); else - workspace[curr_char] = towupper_l(workspace[curr_char], loc); - wasalnum = iswalnum_l(workspace[curr_char], loc); + workspace[curr_char] = towupper_l(workspace[curr_char], libc->lt); + wasalnum = iswalnum_l(workspace[curr_char], libc->lt); } /* @@ -541,7 +579,7 @@ strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, if (srclen + 1 <= destsize) { - locale_t loc = locale->info.lt; + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; char *p; memcpy(dest, src, srclen); @@ -559,7 +597,7 @@ strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, if (locale->is_default) *p = pg_toupper((unsigned char) *p); else - *p = toupper_l((unsigned char) *p, loc); + *p = toupper_l((unsigned char) *p, libc->lt); } } @@ -570,7 +608,8 @@ static size_t strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { - locale_t loc = locale->info.lt; + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + size_t result_size; wchar_t *workspace; char *result; @@ -592,7 +631,7 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, char2wchar(workspace, srclen + 1, src, srclen, locale); for (curr_char = 0; workspace[curr_char] != 0; curr_char++) - workspace[curr_char] = towupper_l(workspace[curr_char], loc); + workspace[curr_char] = towupper_l(workspace[curr_char], libc->lt); /* * Make result large enough; case change might change number of bytes @@ -620,6 +659,7 @@ create_pg_locale_libc(Oid collid, MemoryContext context) const char *collate; const char *ctype; locale_t loc; + struct libc_provider *libc; pg_locale_t result; if (collid == DEFAULT_COLLATION_OID) @@ -658,16 +698,19 @@ create_pg_locale_libc(Oid collid, MemoryContext context) ReleaseSysCache(tp); } - loc = make_libc_collator(collate, ctype); result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); + + libc = MemoryContextAllocZero(context, sizeof(struct libc_provider)); + libc->lt = loc; + result->provider_data = (void *) libc; + result->deterministic = true; result->collate_is_c = (strcmp(collate, "C") == 0) || (strcmp(collate, "POSIX") == 0); result->ctype_is_c = (strcmp(ctype, "C") == 0) || (strcmp(ctype, "POSIX") == 0); - result->info.lt = loc; if (!result->collate_is_c) { #ifdef WIN32 @@ -781,6 +824,8 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, const char *arg2n; int result; + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + if (bufsize1 + bufsize2 > TEXTBUFLEN) buf = palloc(bufsize1 + bufsize2); @@ -811,7 +856,7 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, arg2n = buf2; } - result = strcoll_l(arg1n, arg2n, locale->info.lt); + result = strcoll_l(arg1n, arg2n, libc->lt); if (buf != sbuf) pfree(buf); @@ -835,8 +880,10 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t bufsize = srclen + 1; size_t result; + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + if (srclen == -1) - return strxfrm_l(dest, src, destsize, locale->info.lt); + return strxfrm_l(dest, src, destsize, libc->lt); if (bufsize > TEXTBUFLEN) buf = palloc(bufsize); @@ -845,7 +892,7 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen, memcpy(buf, src, srclen); buf[srclen] = '\0'; - result = strxfrm_l(dest, buf, destsize, locale->info.lt); + result = strxfrm_l(dest, buf, destsize, libc->lt); if (buf != sbuf) pfree(buf); @@ -943,6 +990,8 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2, int r; int result; + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + Assert(GetDatabaseEncoding() == PG_UTF8); if (len1 == -1) @@ -987,7 +1036,7 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2, ((LPWSTR) a2p)[r] = 0; errno = 0; - result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt); + result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, libc->lt); if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */ ereport(ERROR, (errmsg("could not compare Unicode strings: %m"))); @@ -1116,8 +1165,10 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) } else { + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + /* Use wcstombs_l for nondefault locales */ - result = wcstombs_l(to, from, tolen, locale->info.lt); + result = wcstombs_l(to, from, tolen, libc->lt); } return result; @@ -1176,8 +1227,10 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, } else { + struct libc_provider *libc = (struct libc_provider *) locale->provider_data; + /* Use mbstowcs_l for nondefault locales */ - result = mbstowcs_l(to, str, tolen, locale->info.lt); + result = mbstowcs_l(to, str, tolen, libc->lt); } pfree(str); diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index d9650cec5cc..74dd8435a6b 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -148,21 +148,7 @@ struct pg_locale_struct const struct collate_methods *collate; /* NULL if collate_is_c */ const struct ctype_methods *ctype; /* NULL if ctype_is_c */ - union - { - struct - { - const char *locale; - } builtin; - locale_t lt; -#ifdef USE_ICU - struct - { - const char *locale; - UCollator *ucol; - } icu; -#endif - } info; + void *provider_data; }; extern void init_database_collation(void); -- 2.34.1
From 95c70ad9d8a2f90967a5d62b276d96756dfae172 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 9 Oct 2024 10:00:58 -0700 Subject: [PATCH v12 4/4] Don't include ICU headers in pg_locale.h. --- src/backend/commands/collationcmds.c | 4 ++++ src/backend/utils/adt/formatting.c | 4 ---- src/backend/utils/adt/pg_locale.c | 4 ++++ src/backend/utils/adt/pg_locale_icu.c | 1 + src/backend/utils/adt/varlena.c | 4 ++++ src/include/utils/pg_locale.h | 4 ---- 6 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 8acbfbbeda0..a57fe93c387 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -14,6 +14,10 @@ */ #include "postgres.h" +#ifdef USE_ICU +#include <unicode/ucol.h> +#endif + #include "access/htup_details.h" #include "access/table.h" #include "access/xact.h" diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 3960235e14e..2ba4ca7f0f2 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -71,10 +71,6 @@ #include <limits.h> #include <wctype.h> -#ifdef USE_ICU -#include <unicode/ustring.h> -#endif - #include "catalog/pg_collation.h" #include "catalog/pg_type.h" #include "common/int.h" diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index cdb4950ac47..e3ddec2d57d 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -54,6 +54,10 @@ #include <time.h> +#ifdef USE_ICU +#include <unicode/ucol.h> +#endif + #include "access/htup_details.h" #include "catalog/pg_collation.h" #include "catalog/pg_database.h" diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c index 7bd58f26c44..0469c52b669 100644 --- a/src/backend/utils/adt/pg_locale_icu.c +++ b/src/backend/utils/adt/pg_locale_icu.c @@ -13,6 +13,7 @@ #ifdef USE_ICU #include <unicode/ucnv.h> +#include <unicode/ucol.h> #include <unicode/ustring.h> /* diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 34796f2e27c..c57262e1888 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -17,6 +17,10 @@ #include <ctype.h> #include <limits.h> +#ifdef USE_ICU +#include <unicode/uchar.h> +#endif + #include "access/detoast.h" #include "access/toast_compression.h" #include "catalog/pg_collation.h" diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 74dd8435a6b..acb4890a78a 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -14,10 +14,6 @@ #include "mb/pg_wchar.h" -#ifdef USE_ICU -#include <unicode/ucol.h> -#endif - /* use for libc locale names */ #define LOCALE_NAME_BUFLEN 128 -- 2.34.1