On 11.01.22 12:08, Julien Rouhaud wrote:
So, unless there are concerns, I'm going to see about making a patch to call
pg_newlocale_from_collation() even with the default collation. That would
make the actual feature patch quite a bit smaller, since we won't have to
patch every call site of pg_newlocale_from_collation().
+1 for me!
Here is that patch.
If this is applied, then in my estimation all these hunks will
completely disappear from the global ICU patch. So this would be a
significant win.From 2ee4b77221020e81ec83cf37d36e3955bf619d80 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Thu, 13 Jan 2022 09:14:41 +0100
Subject: [PATCH] Call pg_newlocale_from_collation() also with default
collation
Previously, callers of pg_newlocale_from_collation() did not call it
if the collation was DEFAULT_COLLATION_OID and instead proceeded with
a pg_locale_t of 0. Instead, now we call it anyway and have it return
0 if the default collation was passed. It already did this, so we
just have to adjust the callers. This simplifies all the call sites
and also makes future enhancements easier.
After discussion and testing, the previous comment in pg_locale.c
about avoiding this for performance reasons may have been mistaken
since it was testing a very different patch version way back when.
Discussion:
https://www.postgresql.org/message-id/ed3baa81-7fac-7788-cc12-41e3f7917...@enterprisedb.com
---
src/backend/access/hash/hashfunc.c | 4 +-
src/backend/regex/regc_pg_locale.c | 40 ++++++------
src/backend/utils/adt/formatting.c | 96 +++++++++++++---------------
src/backend/utils/adt/like.c | 37 ++++++-----
src/backend/utils/adt/like_support.c | 27 ++++----
src/backend/utils/adt/pg_locale.c | 3 -
src/backend/utils/adt/varchar.c | 26 +++++---
src/backend/utils/adt/varlena.c | 34 ++++++----
8 files changed, 135 insertions(+), 132 deletions(-)
diff --git a/src/backend/access/hash/hashfunc.c
b/src/backend/access/hash/hashfunc.c
index 0521c69dd5..b57ed946c4 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -278,7 +278,7 @@ hashtext(PG_FUNCTION_ARGS)
errmsg("could not determine which collation to
use for string hashing"),
errhint("Use the COLLATE clause to set the
collation explicitly.")));
- if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ if (!lc_collate_is_c(collid))
mylocale = pg_newlocale_from_collation(collid);
if (!mylocale || mylocale->deterministic)
@@ -334,7 +334,7 @@ hashtextextended(PG_FUNCTION_ARGS)
errmsg("could not determine which collation to
use for string hashing"),
errhint("Use the COLLATE clause to set the
collation explicitly.")));
- if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ if (!lc_collate_is_c(collid))
mylocale = pg_newlocale_from_collation(collid);
if (!mylocale || mylocale->deterministic)
diff --git a/src/backend/regex/regc_pg_locale.c
b/src/backend/regex/regc_pg_locale.c
index e0d93eab32..6e84f42cb2 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -231,6 +231,18 @@ static const unsigned char pg_char_properties[128] = {
void
pg_set_regex_collation(Oid collation)
{
+ if (!OidIsValid(collation))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to
use for regular expression"),
+ errhint("Use the COLLATE clause to set the
collation explicitly.")));
+ }
+
if (lc_ctype_is_c(collation))
{
/* C/POSIX collations use this path regardless of database
encoding */
@@ -240,28 +252,12 @@ pg_set_regex_collation(Oid collation)
}
else
{
- if (collation == DEFAULT_COLLATION_OID)
- pg_regex_locale = 0;
- else if (OidIsValid(collation))
- {
- /*
- * NB: pg_newlocale_from_collation will fail if not
HAVE_LOCALE_T;
- * the case of pg_regex_locale != 0 but not
HAVE_LOCALE_T does not
- * have to be considered below.
- */
- pg_regex_locale =
pg_newlocale_from_collation(collation);
- }
- else
- {
- /*
- * This typically means that the parser could not
resolve a
- * conflict of implicit collations, so report it that
way.
- */
- ereport(ERROR,
-
(errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine which
collation to use for regular expression"),
- errhint("Use the COLLATE clause to set
the collation explicitly.")));
- }
+ /*
+ * NB: pg_newlocale_from_collation will fail if not
HAVE_LOCALE_T;
+ * the case of pg_regex_locale != 0 but not HAVE_LOCALE_T does
not
+ * have to be considered below.
+ */
+ pg_regex_locale = pg_newlocale_from_collation(collation);
if (pg_regex_locale && !pg_regex_locale->deterministic)
ereport(ERROR,
diff --git a/src/backend/utils/adt/formatting.c
b/src/backend/utils/adt/formatting.c
index e8f996ac83..d4c2e7b069 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1641,6 +1641,19 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
if (!buff)
return NULL;
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to
use for %s function",
+ "lower()"),
+ errhint("Use the COLLATE clause to set the
collation explicitly.")));
+ }
+
/* C/POSIX collations use this path regardless of database encoding */
if (lc_ctype_is_c(collid))
{
@@ -1648,24 +1661,9 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
}
else
{
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
- if (collid != DEFAULT_COLLATION_OID)
- {
- if (!OidIsValid(collid))
- {
- /*
- * This typically means that the parser could
not resolve a
- * conflict of implicit collations, so report
it that way.
- */
- ereport(ERROR,
-
(errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine
which collation to use for %s function",
- "lower()"),
- errhint("Use the COLLATE
clause to set the collation explicitly.")));
- }
- mylocale = pg_newlocale_from_collation(collid);
- }
+ mylocale = pg_newlocale_from_collation(collid);
#ifdef USE_ICU
if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
@@ -1765,6 +1763,19 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
if (!buff)
return NULL;
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to
use for %s function",
+ "upper()"),
+ errhint("Use the COLLATE clause to set the
collation explicitly.")));
+ }
+
/* C/POSIX collations use this path regardless of database encoding */
if (lc_ctype_is_c(collid))
{
@@ -1772,24 +1783,9 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
}
else
{
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
- if (collid != DEFAULT_COLLATION_OID)
- {
- if (!OidIsValid(collid))
- {
- /*
- * This typically means that the parser could
not resolve a
- * conflict of implicit collations, so report
it that way.
- */
- ereport(ERROR,
-
(errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine
which collation to use for %s function",
- "upper()"),
- errhint("Use the COLLATE
clause to set the collation explicitly.")));
- }
- mylocale = pg_newlocale_from_collation(collid);
- }
+ mylocale = pg_newlocale_from_collation(collid);
#ifdef USE_ICU
if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
@@ -1890,6 +1886,19 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
if (!buff)
return NULL;
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to
use for %s function",
+ "initcap()"),
+ errhint("Use the COLLATE clause to set the
collation explicitly.")));
+ }
+
/* C/POSIX collations use this path regardless of database encoding */
if (lc_ctype_is_c(collid))
{
@@ -1897,24 +1906,9 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
}
else
{
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
- if (collid != DEFAULT_COLLATION_OID)
- {
- if (!OidIsValid(collid))
- {
- /*
- * This typically means that the parser could
not resolve a
- * conflict of implicit collations, so report
it that way.
- */
- ereport(ERROR,
-
(errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine
which collation to use for %s function",
- "initcap()"),
- errhint("Use the COLLATE
clause to set the collation explicitly.")));
- }
- mylocale = pg_newlocale_from_collation(collid);
- }
+ mylocale = pg_newlocale_from_collation(collid);
#ifdef USE_ICU
if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 9f241dc7c6..833ee8f814 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -150,7 +150,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool
locale_is_c)
static inline int
GenericMatchText(const char *s, int slen, const char *p, int plen, Oid
collation)
{
- if (collation && !lc_ctype_is_c(collation) && collation !=
DEFAULT_COLLATION_OID)
+ if (collation && !lc_ctype_is_c(collation))
{
pg_locale_t locale = pg_newlocale_from_collation(collation);
@@ -178,28 +178,27 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
pg_locale_t locale = 0;
bool locale_is_c = false;
+ if (!OidIsValid(collation))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to
use for ILIKE"),
+ errhint("Use the COLLATE clause to set the
collation explicitly.")));
+ }
+
if (lc_ctype_is_c(collation))
locale_is_c = true;
- else if (collation != DEFAULT_COLLATION_OID)
- {
- if (!OidIsValid(collation))
- {
- /*
- * This typically means that the parser could not
resolve a
- * conflict of implicit collations, so report it that
way.
- */
- ereport(ERROR,
-
(errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine which
collation to use for ILIKE"),
- errhint("Use the COLLATE clause to set
the collation explicitly.")));
- }
+ else
locale = pg_newlocale_from_collation(collation);
- if (locale && !locale->deterministic)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("nondeterministic collations
are not supported for ILIKE")));
- }
+ if (locale && !locale->deterministic)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not
supported for ILIKE")));
/*
* For efficiency reasons, in the single byte case we don't call lower()
diff --git a/src/backend/utils/adt/like_support.c
b/src/backend/utils/adt/like_support.c
index 7ca2a01e49..65a57fc3c4 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -1012,24 +1012,23 @@ like_fixed_prefix(Const *patt_const, bool
case_insensitive, Oid collation,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("case insensitive matching not
supported on type bytea")));
+ if (!OidIsValid(collation))
+ {
+ /*
+ * This typically means that the parser could not
resolve a
+ * conflict of implicit collations, so report it that
way.
+ */
+ ereport(ERROR,
+
(errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which
collation to use for ILIKE"),
+ errhint("Use the COLLATE clause to set
the collation explicitly.")));
+ }
+
/* If case-insensitive, we need locale info */
if (lc_ctype_is_c(collation))
locale_is_c = true;
- else if (collation != DEFAULT_COLLATION_OID)
- {
- if (!OidIsValid(collation))
- {
- /*
- * This typically means that the parser could
not resolve a
- * conflict of implicit collations, so report
it that way.
- */
- ereport(ERROR,
-
(errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine
which collation to use for ILIKE"),
- errhint("Use the COLLATE
clause to set the collation explicitly.")));
- }
+ else
locale = pg_newlocale_from_collation(collation);
- }
}
if (typeid != BYTEAOID)
diff --git a/src/backend/utils/adt/pg_locale.c
b/src/backend/utils/adt/pg_locale.c
index 18f3afdc62..33cccc5c6c 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1454,8 +1454,6 @@ report_newlocale_failure(const char *localename)
*
* As a special optimization, the default/database collation returns 0.
* Callers should then revert to the non-locale_t-enabled code path.
- * In fact, they shouldn't call this function at all when they are dealing
- * with the default locale. That can save quite a bit in hotspots.
* Also, callers should avoid calling this before going down a C/POSIX
* fastpath, because such a fastpath should work even on platforms without
* locale_t support in the C library.
@@ -1472,7 +1470,6 @@ pg_newlocale_from_collation(Oid collid)
/* Callers must pass a valid OID */
Assert(OidIsValid(collid));
- /* Return 0 for "default" collation, just in case caller forgets */
if (collid == DEFAULT_COLLATION_OID)
return (pg_locale_t) 0;
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 95f768c884..8b5b30ed71 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -743,15 +743,20 @@ bpchareq(PG_FUNCTION_ARGS)
len2;
bool result;
Oid collid = PG_GET_COLLATION();
+ bool locale_is_c = false;
+ pg_locale_t mylocale = 0;
check_collation_set(collid);
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
- if (lc_collate_is_c(collid) ||
- collid == DEFAULT_COLLATION_OID ||
- pg_newlocale_from_collation(collid)->deterministic)
+ if (lc_collate_is_c(collid))
+ locale_is_c = true;
+ else
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (locale_is_c || !mylocale || mylocale->deterministic)
{
/*
* Since we only care about equality or not-equality, we can
avoid all
@@ -783,15 +788,20 @@ bpcharne(PG_FUNCTION_ARGS)
len2;
bool result;
Oid collid = PG_GET_COLLATION();
+ bool locale_is_c = false;
+ pg_locale_t mylocale = 0;
check_collation_set(collid);
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
- if (lc_collate_is_c(collid) ||
- collid == DEFAULT_COLLATION_OID ||
- pg_newlocale_from_collation(collid)->deterministic)
+ if (lc_collate_is_c(collid))
+ locale_is_c = true;
+ else
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (locale_is_c || !mylocale || mylocale->deterministic)
{
/*
* Since we only care about equality or not-equality, we can
avoid all
@@ -996,7 +1006,7 @@ hashbpchar(PG_FUNCTION_ARGS)
keydata = VARDATA_ANY(key);
keylen = bcTruelen(key);
- if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ if (!lc_collate_is_c(collid))
mylocale = pg_newlocale_from_collation(collid);
if (!mylocale || mylocale->deterministic)
@@ -1056,7 +1066,7 @@ hashbpcharextended(PG_FUNCTION_ARGS)
keydata = VARDATA_ANY(key);
keylen = bcTruelen(key);
- if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ if (!lc_collate_is_c(collid))
mylocale = pg_newlocale_from_collation(collid);
if (!mylocale || mylocale->deterministic)
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index b3eb39761d..a8db8080e2 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1200,7 +1200,7 @@ text_position_setup(text *t1, text *t2, Oid collid,
TextPositionState *state)
check_collation_set(collid);
- if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ if (!lc_collate_is_c(collid))
mylocale = pg_newlocale_from_collation(collid);
if (mylocale && !mylocale->deterministic)
@@ -1556,10 +1556,9 @@ varstr_cmp(const char *arg1, int len1, const char *arg2,
int len2, Oid collid)
char a2buf[TEXTBUFLEN];
char *a1p,
*a2p;
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
- if (collid != DEFAULT_COLLATION_OID)
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
/*
* memcmp() can't tell us which of two unequal strings sorts
first,
@@ -1776,13 +1775,18 @@ Datum
texteq(PG_FUNCTION_ARGS)
{
Oid collid = PG_GET_COLLATION();
+ bool locale_is_c = false;
+ pg_locale_t mylocale = 0;
bool result;
check_collation_set(collid);
- if (lc_collate_is_c(collid) ||
- collid == DEFAULT_COLLATION_OID ||
- pg_newlocale_from_collation(collid)->deterministic)
+ if (lc_collate_is_c(collid))
+ locale_is_c = true;
+ else
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (locale_is_c || !mylocale || mylocale->deterministic)
{
Datum arg1 = PG_GETARG_DATUM(0);
Datum arg2 = PG_GETARG_DATUM(1);
@@ -1830,13 +1834,18 @@ Datum
textne(PG_FUNCTION_ARGS)
{
Oid collid = PG_GET_COLLATION();
+ bool locale_is_c = false;
+ pg_locale_t mylocale = 0;
bool result;
check_collation_set(collid);
- if (lc_collate_is_c(collid) ||
- collid == DEFAULT_COLLATION_OID ||
- pg_newlocale_from_collation(collid)->deterministic)
+ if (lc_collate_is_c(collid))
+ locale_is_c = true;
+ else
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (locale_is_c || !mylocale || mylocale->deterministic)
{
Datum arg1 = PG_GETARG_DATUM(0);
Datum arg2 = PG_GETARG_DATUM(1);
@@ -1947,7 +1956,7 @@ text_starts_with(PG_FUNCTION_ARGS)
check_collation_set(collid);
- if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ if (!lc_collate_is_c(collid))
mylocale = pg_newlocale_from_collation(collid);
if (mylocale && !mylocale->deterministic)
@@ -2061,8 +2070,7 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid
collid)
* we'll figure out the collation based on the locale id and
cache the
* result.
*/
- if (collid != DEFAULT_COLLATION_OID)
- locale = pg_newlocale_from_collation(collid);
+ locale = pg_newlocale_from_collation(collid);
/*
* There is a further exception on Windows. When the database
--
2.34.1