On 11.01.22 12:08, Julien Rouhaud wrote:
So, unless there are concerns, I'm going to see about making a patch to call
pg_newlocale_from_collation() even with the default collation. That would
make the actual feature patch quite a bit smaller, since we won't have to
patch every call site of pg_newlocale_from_collation().
+1 for me!

Here is that patch.

If this is applied, then in my estimation all these hunks will completely disappear from the global ICU patch. So this would be a significant win.
From 2ee4b77221020e81ec83cf37d36e3955bf619d80 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Thu, 13 Jan 2022 09:14:41 +0100
Subject: [PATCH] Call pg_newlocale_from_collation() also with default
 collation

Previously, callers of pg_newlocale_from_collation() did not call it
if the collation was DEFAULT_COLLATION_OID and instead proceeded with
a pg_locale_t of 0.  Instead, now we call it anyway and have it return
0 if the default collation was passed.  It already did this, so we
just have to adjust the callers.  This simplifies all the call sites
and also makes future enhancements easier.

After discussion and testing, the previous comment in pg_locale.c
about avoiding this for performance reasons may have been mistaken
since it was testing a very different patch version way back when.

Discussion: 
https://www.postgresql.org/message-id/ed3baa81-7fac-7788-cc12-41e3f7917...@enterprisedb.com
---
 src/backend/access/hash/hashfunc.c   |  4 +-
 src/backend/regex/regc_pg_locale.c   | 40 ++++++------
 src/backend/utils/adt/formatting.c   | 96 +++++++++++++---------------
 src/backend/utils/adt/like.c         | 37 ++++++-----
 src/backend/utils/adt/like_support.c | 27 ++++----
 src/backend/utils/adt/pg_locale.c    |  3 -
 src/backend/utils/adt/varchar.c      | 26 +++++---
 src/backend/utils/adt/varlena.c      | 34 ++++++----
 8 files changed, 135 insertions(+), 132 deletions(-)

diff --git a/src/backend/access/hash/hashfunc.c 
b/src/backend/access/hash/hashfunc.c
index 0521c69dd5..b57ed946c4 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -278,7 +278,7 @@ hashtext(PG_FUNCTION_ARGS)
                                 errmsg("could not determine which collation to 
use for string hashing"),
                                 errhint("Use the COLLATE clause to set the 
collation explicitly.")));
 
-       if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       if (!lc_collate_is_c(collid))
                mylocale = pg_newlocale_from_collation(collid);
 
        if (!mylocale || mylocale->deterministic)
@@ -334,7 +334,7 @@ hashtextextended(PG_FUNCTION_ARGS)
                                 errmsg("could not determine which collation to 
use for string hashing"),
                                 errhint("Use the COLLATE clause to set the 
collation explicitly.")));
 
-       if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       if (!lc_collate_is_c(collid))
                mylocale = pg_newlocale_from_collation(collid);
 
        if (!mylocale || mylocale->deterministic)
diff --git a/src/backend/regex/regc_pg_locale.c 
b/src/backend/regex/regc_pg_locale.c
index e0d93eab32..6e84f42cb2 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -231,6 +231,18 @@ static const unsigned char pg_char_properties[128] = {
 void
 pg_set_regex_collation(Oid collation)
 {
+       if (!OidIsValid(collation))
+       {
+               /*
+                * This typically means that the parser could not resolve a
+                * conflict of implicit collations, so report it that way.
+                */
+               ereport(ERROR,
+                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                                errmsg("could not determine which collation to 
use for regular expression"),
+                                errhint("Use the COLLATE clause to set the 
collation explicitly.")));
+       }
+
        if (lc_ctype_is_c(collation))
        {
                /* C/POSIX collations use this path regardless of database 
encoding */
@@ -240,28 +252,12 @@ pg_set_regex_collation(Oid collation)
        }
        else
        {
-               if (collation == DEFAULT_COLLATION_OID)
-                       pg_regex_locale = 0;
-               else if (OidIsValid(collation))
-               {
-                       /*
-                        * NB: pg_newlocale_from_collation will fail if not 
HAVE_LOCALE_T;
-                        * the case of pg_regex_locale != 0 but not 
HAVE_LOCALE_T does not
-                        * have to be considered below.
-                        */
-                       pg_regex_locale = 
pg_newlocale_from_collation(collation);
-               }
-               else
-               {
-                       /*
-                        * This typically means that the parser could not 
resolve a
-                        * conflict of implicit collations, so report it that 
way.
-                        */
-                       ereport(ERROR,
-                                       
(errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                        errmsg("could not determine which 
collation to use for regular expression"),
-                                        errhint("Use the COLLATE clause to set 
the collation explicitly.")));
-               }
+               /*
+                * NB: pg_newlocale_from_collation will fail if not 
HAVE_LOCALE_T;
+                * the case of pg_regex_locale != 0 but not HAVE_LOCALE_T does 
not
+                * have to be considered below.
+                */
+               pg_regex_locale = pg_newlocale_from_collation(collation);
 
                if (pg_regex_locale && !pg_regex_locale->deterministic)
                        ereport(ERROR,
diff --git a/src/backend/utils/adt/formatting.c 
b/src/backend/utils/adt/formatting.c
index e8f996ac83..d4c2e7b069 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1641,6 +1641,19 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
        if (!buff)
                return NULL;
 
+       if (!OidIsValid(collid))
+       {
+               /*
+                * This typically means that the parser could not resolve a
+                * conflict of implicit collations, so report it that way.
+                */
+               ereport(ERROR,
+                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                                errmsg("could not determine which collation to 
use for %s function",
+                                               "lower()"),
+                                errhint("Use the COLLATE clause to set the 
collation explicitly.")));
+       }
+
        /* C/POSIX collations use this path regardless of database encoding */
        if (lc_ctype_is_c(collid))
        {
@@ -1648,24 +1661,9 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
        }
        else
        {
-               pg_locale_t mylocale = 0;
+               pg_locale_t mylocale;
 
-               if (collid != DEFAULT_COLLATION_OID)
-               {
-                       if (!OidIsValid(collid))
-                       {
-                               /*
-                                * This typically means that the parser could 
not resolve a
-                                * conflict of implicit collations, so report 
it that way.
-                                */
-                               ereport(ERROR,
-                                               
(errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                                errmsg("could not determine 
which collation to use for %s function",
-                                                               "lower()"),
-                                                errhint("Use the COLLATE 
clause to set the collation explicitly.")));
-                       }
-                       mylocale = pg_newlocale_from_collation(collid);
-               }
+               mylocale = pg_newlocale_from_collation(collid);
 
 #ifdef USE_ICU
                if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
@@ -1765,6 +1763,19 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
        if (!buff)
                return NULL;
 
+       if (!OidIsValid(collid))
+       {
+               /*
+                * This typically means that the parser could not resolve a
+                * conflict of implicit collations, so report it that way.
+                */
+               ereport(ERROR,
+                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                                errmsg("could not determine which collation to 
use for %s function",
+                                               "upper()"),
+                                errhint("Use the COLLATE clause to set the 
collation explicitly.")));
+       }
+
        /* C/POSIX collations use this path regardless of database encoding */
        if (lc_ctype_is_c(collid))
        {
@@ -1772,24 +1783,9 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
        }
        else
        {
-               pg_locale_t mylocale = 0;
+               pg_locale_t mylocale;
 
-               if (collid != DEFAULT_COLLATION_OID)
-               {
-                       if (!OidIsValid(collid))
-                       {
-                               /*
-                                * This typically means that the parser could 
not resolve a
-                                * conflict of implicit collations, so report 
it that way.
-                                */
-                               ereport(ERROR,
-                                               
(errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                                errmsg("could not determine 
which collation to use for %s function",
-                                                               "upper()"),
-                                                errhint("Use the COLLATE 
clause to set the collation explicitly.")));
-                       }
-                       mylocale = pg_newlocale_from_collation(collid);
-               }
+               mylocale = pg_newlocale_from_collation(collid);
 
 #ifdef USE_ICU
                if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
@@ -1890,6 +1886,19 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
        if (!buff)
                return NULL;
 
+       if (!OidIsValid(collid))
+       {
+               /*
+                * This typically means that the parser could not resolve a
+                * conflict of implicit collations, so report it that way.
+                */
+               ereport(ERROR,
+                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                                errmsg("could not determine which collation to 
use for %s function",
+                                               "initcap()"),
+                                errhint("Use the COLLATE clause to set the 
collation explicitly.")));
+       }
+
        /* C/POSIX collations use this path regardless of database encoding */
        if (lc_ctype_is_c(collid))
        {
@@ -1897,24 +1906,9 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
        }
        else
        {
-               pg_locale_t mylocale = 0;
+               pg_locale_t mylocale;
 
-               if (collid != DEFAULT_COLLATION_OID)
-               {
-                       if (!OidIsValid(collid))
-                       {
-                               /*
-                                * This typically means that the parser could 
not resolve a
-                                * conflict of implicit collations, so report 
it that way.
-                                */
-                               ereport(ERROR,
-                                               
(errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                                errmsg("could not determine 
which collation to use for %s function",
-                                                               "initcap()"),
-                                                errhint("Use the COLLATE 
clause to set the collation explicitly.")));
-                       }
-                       mylocale = pg_newlocale_from_collation(collid);
-               }
+               mylocale = pg_newlocale_from_collation(collid);
 
 #ifdef USE_ICU
                if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 9f241dc7c6..833ee8f814 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -150,7 +150,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool 
locale_is_c)
 static inline int
 GenericMatchText(const char *s, int slen, const char *p, int plen, Oid 
collation)
 {
-       if (collation && !lc_ctype_is_c(collation) && collation != 
DEFAULT_COLLATION_OID)
+       if (collation && !lc_ctype_is_c(collation))
        {
                pg_locale_t locale = pg_newlocale_from_collation(collation);
 
@@ -178,28 +178,27 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
        pg_locale_t locale = 0;
        bool            locale_is_c = false;
 
+       if (!OidIsValid(collation))
+       {
+               /*
+                * This typically means that the parser could not resolve a
+                * conflict of implicit collations, so report it that way.
+                */
+               ereport(ERROR,
+                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                                errmsg("could not determine which collation to 
use for ILIKE"),
+                                errhint("Use the COLLATE clause to set the 
collation explicitly.")));
+       }
+
        if (lc_ctype_is_c(collation))
                locale_is_c = true;
-       else if (collation != DEFAULT_COLLATION_OID)
-       {
-               if (!OidIsValid(collation))
-               {
-                       /*
-                        * This typically means that the parser could not 
resolve a
-                        * conflict of implicit collations, so report it that 
way.
-                        */
-                       ereport(ERROR,
-                                       
(errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                        errmsg("could not determine which 
collation to use for ILIKE"),
-                                        errhint("Use the COLLATE clause to set 
the collation explicitly.")));
-               }
+       else
                locale = pg_newlocale_from_collation(collation);
 
-               if (locale && !locale->deterministic)
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                        errmsg("nondeterministic collations 
are not supported for ILIKE")));
-       }
+       if (locale && !locale->deterministic)
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("nondeterministic collations are not 
supported for ILIKE")));
 
        /*
         * For efficiency reasons, in the single byte case we don't call lower()
diff --git a/src/backend/utils/adt/like_support.c 
b/src/backend/utils/adt/like_support.c
index 7ca2a01e49..65a57fc3c4 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -1012,24 +1012,23 @@ like_fixed_prefix(Const *patt_const, bool 
case_insensitive, Oid collation,
                                        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                                         errmsg("case insensitive matching not 
supported on type bytea")));
 
+               if (!OidIsValid(collation))
+               {
+                       /*
+                        * This typically means that the parser could not 
resolve a
+                        * conflict of implicit collations, so report it that 
way.
+                        */
+                       ereport(ERROR,
+                                       
(errcode(ERRCODE_INDETERMINATE_COLLATION),
+                                        errmsg("could not determine which 
collation to use for ILIKE"),
+                                        errhint("Use the COLLATE clause to set 
the collation explicitly.")));
+               }
+
                /* If case-insensitive, we need locale info */
                if (lc_ctype_is_c(collation))
                        locale_is_c = true;
-               else if (collation != DEFAULT_COLLATION_OID)
-               {
-                       if (!OidIsValid(collation))
-                       {
-                               /*
-                                * This typically means that the parser could 
not resolve a
-                                * conflict of implicit collations, so report 
it that way.
-                                */
-                               ereport(ERROR,
-                                               
(errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                                errmsg("could not determine 
which collation to use for ILIKE"),
-                                                errhint("Use the COLLATE 
clause to set the collation explicitly.")));
-                       }
+               else
                        locale = pg_newlocale_from_collation(collation);
-               }
        }
 
        if (typeid != BYTEAOID)
diff --git a/src/backend/utils/adt/pg_locale.c 
b/src/backend/utils/adt/pg_locale.c
index 18f3afdc62..33cccc5c6c 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1454,8 +1454,6 @@ report_newlocale_failure(const char *localename)
  *
  * As a special optimization, the default/database collation returns 0.
  * Callers should then revert to the non-locale_t-enabled code path.
- * In fact, they shouldn't call this function at all when they are dealing
- * with the default locale.  That can save quite a bit in hotspots.
  * Also, callers should avoid calling this before going down a C/POSIX
  * fastpath, because such a fastpath should work even on platforms without
  * locale_t support in the C library.
@@ -1472,7 +1470,6 @@ pg_newlocale_from_collation(Oid collid)
        /* Callers must pass a valid OID */
        Assert(OidIsValid(collid));
 
-       /* Return 0 for "default" collation, just in case caller forgets */
        if (collid == DEFAULT_COLLATION_OID)
                return (pg_locale_t) 0;
 
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 95f768c884..8b5b30ed71 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -743,15 +743,20 @@ bpchareq(PG_FUNCTION_ARGS)
                                len2;
        bool            result;
        Oid                     collid = PG_GET_COLLATION();
+       bool            locale_is_c = false;
+       pg_locale_t     mylocale = 0;
 
        check_collation_set(collid);
 
        len1 = bcTruelen(arg1);
        len2 = bcTruelen(arg2);
 
-       if (lc_collate_is_c(collid) ||
-               collid == DEFAULT_COLLATION_OID ||
-               pg_newlocale_from_collation(collid)->deterministic)
+       if (lc_collate_is_c(collid))
+               locale_is_c = true;
+       else
+               mylocale = pg_newlocale_from_collation(collid);
+
+       if (locale_is_c || !mylocale || mylocale->deterministic)
        {
                /*
                 * Since we only care about equality or not-equality, we can 
avoid all
@@ -783,15 +788,20 @@ bpcharne(PG_FUNCTION_ARGS)
                                len2;
        bool            result;
        Oid                     collid = PG_GET_COLLATION();
+       bool            locale_is_c = false;
+       pg_locale_t     mylocale = 0;
 
        check_collation_set(collid);
 
        len1 = bcTruelen(arg1);
        len2 = bcTruelen(arg2);
 
-       if (lc_collate_is_c(collid) ||
-               collid == DEFAULT_COLLATION_OID ||
-               pg_newlocale_from_collation(collid)->deterministic)
+       if (lc_collate_is_c(collid))
+               locale_is_c = true;
+       else
+               mylocale = pg_newlocale_from_collation(collid);
+
+       if (locale_is_c || !mylocale || mylocale->deterministic)
        {
                /*
                 * Since we only care about equality or not-equality, we can 
avoid all
@@ -996,7 +1006,7 @@ hashbpchar(PG_FUNCTION_ARGS)
        keydata = VARDATA_ANY(key);
        keylen = bcTruelen(key);
 
-       if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       if (!lc_collate_is_c(collid))
                mylocale = pg_newlocale_from_collation(collid);
 
        if (!mylocale || mylocale->deterministic)
@@ -1056,7 +1066,7 @@ hashbpcharextended(PG_FUNCTION_ARGS)
        keydata = VARDATA_ANY(key);
        keylen = bcTruelen(key);
 
-       if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       if (!lc_collate_is_c(collid))
                mylocale = pg_newlocale_from_collation(collid);
 
        if (!mylocale || mylocale->deterministic)
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index b3eb39761d..a8db8080e2 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1200,7 +1200,7 @@ text_position_setup(text *t1, text *t2, Oid collid, 
TextPositionState *state)
 
        check_collation_set(collid);
 
-       if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       if (!lc_collate_is_c(collid))
                mylocale = pg_newlocale_from_collation(collid);
 
        if (mylocale && !mylocale->deterministic)
@@ -1556,10 +1556,9 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, 
int len2, Oid collid)
                char            a2buf[TEXTBUFLEN];
                char       *a1p,
                                   *a2p;
-               pg_locale_t mylocale = 0;
+               pg_locale_t mylocale;
 
-               if (collid != DEFAULT_COLLATION_OID)
-                       mylocale = pg_newlocale_from_collation(collid);
+               mylocale = pg_newlocale_from_collation(collid);
 
                /*
                 * memcmp() can't tell us which of two unequal strings sorts 
first,
@@ -1776,13 +1775,18 @@ Datum
 texteq(PG_FUNCTION_ARGS)
 {
        Oid                     collid = PG_GET_COLLATION();
+       bool            locale_is_c = false;
+       pg_locale_t     mylocale = 0;
        bool            result;
 
        check_collation_set(collid);
 
-       if (lc_collate_is_c(collid) ||
-               collid == DEFAULT_COLLATION_OID ||
-               pg_newlocale_from_collation(collid)->deterministic)
+       if (lc_collate_is_c(collid))
+               locale_is_c = true;
+       else
+               mylocale = pg_newlocale_from_collation(collid);
+
+       if (locale_is_c || !mylocale || mylocale->deterministic)
        {
                Datum           arg1 = PG_GETARG_DATUM(0);
                Datum           arg2 = PG_GETARG_DATUM(1);
@@ -1830,13 +1834,18 @@ Datum
 textne(PG_FUNCTION_ARGS)
 {
        Oid                     collid = PG_GET_COLLATION();
+       bool            locale_is_c = false;
+       pg_locale_t     mylocale = 0;
        bool            result;
 
        check_collation_set(collid);
 
-       if (lc_collate_is_c(collid) ||
-               collid == DEFAULT_COLLATION_OID ||
-               pg_newlocale_from_collation(collid)->deterministic)
+       if (lc_collate_is_c(collid))
+               locale_is_c = true;
+       else
+               mylocale = pg_newlocale_from_collation(collid);
+
+       if (locale_is_c || !mylocale || mylocale->deterministic)
        {
                Datum           arg1 = PG_GETARG_DATUM(0);
                Datum           arg2 = PG_GETARG_DATUM(1);
@@ -1947,7 +1956,7 @@ text_starts_with(PG_FUNCTION_ARGS)
 
        check_collation_set(collid);
 
-       if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       if (!lc_collate_is_c(collid))
                mylocale = pg_newlocale_from_collation(collid);
 
        if (mylocale && !mylocale->deterministic)
@@ -2061,8 +2070,7 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid 
collid)
                 * we'll figure out the collation based on the locale id and 
cache the
                 * result.
                 */
-               if (collid != DEFAULT_COLLATION_OID)
-                       locale = pg_newlocale_from_collation(collid);
+               locale = pg_newlocale_from_collation(collid);
 
                /*
                 * There is a further exception on Windows.  When the database
-- 
2.34.1

Reply via email to