On 06.08.24 23:40, Jeff Davis wrote:
With these changes, collations are no longer dependent on the
environment locale (setlocale()) at all for either collation behavior
(ORDER BY) or ctype behavior (LOWER(), etc.).

Additionally, unless I missed something, nothing in the server is
dependent on LC_COLLATE at all.

There are still some things that depend on setlocale() in one way or
another:

   - char2wchar() & wchar2char()
   - ts_locale.c
   - various places that depend on LC_CTYPE unrelated to the collation
infrastructure
   - things that depend on other locale settings, like LC_NUMERIC

We can address those as part of a separate thread. I'll count this as
committed.

I have a couple of small follow-up patches for this.

First, in like.c, SB_lower_char() now reads:

static char
SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
{
    if (locale_is_c)
        return pg_ascii_tolower(c);
    else if (locale)
        return tolower_l(c, locale->info.lt);
    else
        return pg_tolower(c);
}

But after this patch set, locale cannot be NULL anymore, so the third branch is obsolete.

(Now that I look at it, pg_tolower() has some short-circuiting for ASCII letters, so it would not handle Turkish-i correctly if that had been the global locale. By removing the use of pg_tolower(), we fix that issue in passing.)

Second, there are a number of functions in like.c like the above that take separate arguments like pg_locale_t locale, bool locale_is_c. Because pg_locale_t now contains the locale_is_c information, these can be combined.
From 7de0b6c3069c5fbef7729b7a86e40b75dfb616fd Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Wed, 7 Aug 2024 22:24:39 +0200
Subject: [PATCH 1/2] Remove dead code

After e9931bfb751, the locale argument of SB_lower_char() is never not
NULL, so the branch that deals with NULL can be removed.
---
 src/backend/utils/adt/like.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 0ecc96d48e5..bb4d73fb22e 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -95,10 +95,8 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool 
locale_is_c)
 {
        if (locale_is_c)
                return pg_ascii_tolower(c);
-       else if (locale)
-               return tolower_l(c, locale->info.lt);
        else
-               return pg_tolower(c);
+               return tolower_l(c, locale->info.lt);
 }
 
 
-- 
2.46.0

From 5d3b595fc08c3a013b5ebe532b3922bf9db7d963 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Wed, 7 Aug 2024 22:30:30 +0200
Subject: [PATCH 2/2] Remove separate locale_is_c arguments

Since e9931bfb751, ctype_is_c is part of pg_locale_t.  Some functions
passed a pg_locale_t and a bool argument separately.  This can now be
combined into one argument.
---
 src/backend/utils/adt/like.c       | 30 +++++++++++++++---------------
 src/backend/utils/adt/like_match.c |  6 ++----
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index bb4d73fb22e..a3a1178887c 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -33,18 +33,18 @@
 
 
 static int     SB_MatchText(const char *t, int tlen, const char *p, int plen,
-                                                pg_locale_t locale, bool 
locale_is_c);
+                                                pg_locale_t locale);
 static text *SB_do_like_escape(text *pat, text *esc);
 
 static int     MB_MatchText(const char *t, int tlen, const char *p, int plen,
-                                                pg_locale_t locale, bool 
locale_is_c);
+                                                pg_locale_t locale);
 static text *MB_do_like_escape(text *pat, text *esc);
 
 static int     UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
-                                                  pg_locale_t locale, bool 
locale_is_c);
+                                                  pg_locale_t locale);
 
 static int     SB_IMatchText(const char *t, int tlen, const char *p, int plen,
-                                                 pg_locale_t locale, bool 
locale_is_c);
+                                                 pg_locale_t locale);
 
 static int     GenericMatchText(const char *s, int slen, const char *p, int 
plen, Oid collation);
 static int     Generic_Text_IC_like(text *str, text *pat, Oid collation);
@@ -91,9 +91,9 @@ wchareq(const char *p1, const char *p2)
  * fold-on-the-fly processing, however.
  */
 static char
-SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
+SB_lower_char(unsigned char c, pg_locale_t locale)
 {
-       if (locale_is_c)
+       if (locale->ctype_is_c)
                return pg_ascii_tolower(c);
        else
                return tolower_l(c, locale->info.lt);
@@ -129,7 +129,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool 
locale_is_c)
 #include "like_match.c"
 
 /* setup to compile like_match.c for single byte case insensitive matches */
-#define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
+#define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale)
 #define NextChar(p, plen) NextByte((p), (plen))
 #define MatchText SB_IMatchText
 
@@ -158,11 +158,11 @@ GenericMatchText(const char *s, int slen, const char *p, 
int plen, Oid collation
        }
 
        if (pg_database_encoding_max_length() == 1)
-               return SB_MatchText(s, slen, p, plen, 0, true);
+               return SB_MatchText(s, slen, p, plen, 0);
        else if (GetDatabaseEncoding() == PG_UTF8)
-               return UTF8_MatchText(s, slen, p, plen, 0, true);
+               return UTF8_MatchText(s, slen, p, plen, 0);
        else
-               return MB_MatchText(s, slen, p, plen, 0, true);
+               return MB_MatchText(s, slen, p, plen, 0);
 }
 
 static inline int
@@ -212,9 +212,9 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
                s = VARDATA_ANY(str);
                slen = VARSIZE_ANY_EXHDR(str);
                if (GetDatabaseEncoding() == PG_UTF8)
-                       return UTF8_MatchText(s, slen, p, plen, 0, true);
+                       return UTF8_MatchText(s, slen, p, plen, 0);
                else
-                       return MB_MatchText(s, slen, p, plen, 0, true);
+                       return MB_MatchText(s, slen, p, plen, 0);
        }
        else
        {
@@ -222,7 +222,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
                plen = VARSIZE_ANY_EXHDR(pat);
                s = VARDATA_ANY(str);
                slen = VARSIZE_ANY_EXHDR(str);
-               return SB_IMatchText(s, slen, p, plen, locale, 
locale->ctype_is_c);
+               return SB_IMatchText(s, slen, p, plen, locale);
        }
 }
 
@@ -330,7 +330,7 @@ bytealike(PG_FUNCTION_ARGS)
        p = VARDATA_ANY(pat);
        plen = VARSIZE_ANY_EXHDR(pat);
 
-       result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
+       result = (SB_MatchText(s, slen, p, plen, 0) == LIKE_TRUE);
 
        PG_RETURN_BOOL(result);
 }
@@ -351,7 +351,7 @@ byteanlike(PG_FUNCTION_ARGS)
        p = VARDATA_ANY(pat);
        plen = VARSIZE_ANY_EXHDR(pat);
 
-       result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
+       result = (SB_MatchText(s, slen, p, plen, 0) != LIKE_TRUE);
 
        PG_RETURN_BOOL(result);
 }
diff --git a/src/backend/utils/adt/like_match.c 
b/src/backend/utils/adt/like_match.c
index f2990edff7e..928a1e99fc5 100644
--- a/src/backend/utils/adt/like_match.c
+++ b/src/backend/utils/adt/like_match.c
@@ -77,8 +77,7 @@
 #endif
 
 static int
-MatchText(const char *t, int tlen, const char *p, int plen,
-                 pg_locale_t locale, bool locale_is_c)
+MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
 {
        /* Fast path for match-everything pattern */
        if (plen == 1 && *p == '%')
@@ -175,8 +174,7 @@ MatchText(const char *t, int tlen, const char *p, int plen,
                        {
                                if (GETCHAR(*t) == firstpat)
                                {
-                                       int                     matched = 
MatchText(t, tlen, p, plen,
-                                                                               
                        locale, locale_is_c);
+                                       int                     matched = 
MatchText(t, tlen, p, plen, locale);
 
                                        if (matched != LIKE_FALSE)
                                                return matched; /* TRUE or 
ABORT */
-- 
2.46.0

Reply via email to