On 06.08.24 23:40, Jeff Davis wrote:
With these changes, collations are no longer dependent on the
environment locale (setlocale()) at all for either collation behavior
(ORDER BY) or ctype behavior (LOWER(), etc.).
Additionally, unless I missed something, nothing in the server is
dependent on LC_COLLATE at all.
There are still some things that depend on setlocale() in one way or
another:
- char2wchar() & wchar2char()
- ts_locale.c
- various places that depend on LC_CTYPE unrelated to the collation
infrastructure
- things that depend on other locale settings, like LC_NUMERIC
We can address those as part of a separate thread. I'll count this as
committed.
I have a couple of small follow-up patches for this.
First, in like.c, SB_lower_char() now reads:
static char
SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
{
if (locale_is_c)
return pg_ascii_tolower(c);
else if (locale)
return tolower_l(c, locale->info.lt);
else
return pg_tolower(c);
}
But after this patch set, locale cannot be NULL anymore, so the third
branch is obsolete.
(Now that I look at it, pg_tolower() has some short-circuiting for ASCII
letters, so it would not handle Turkish-i correctly if that had been the
global locale. By removing the use of pg_tolower(), we fix that issue
in passing.)
Second, there are a number of functions in like.c like the above that
take separate arguments like pg_locale_t locale, bool locale_is_c.
Because pg_locale_t now contains the locale_is_c information, these can
be combined.From 7de0b6c3069c5fbef7729b7a86e40b75dfb616fd Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Wed, 7 Aug 2024 22:24:39 +0200
Subject: [PATCH 1/2] Remove dead code
After e9931bfb751, the locale argument of SB_lower_char() is never not
NULL, so the branch that deals with NULL can be removed.
---
src/backend/utils/adt/like.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 0ecc96d48e5..bb4d73fb22e 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -95,10 +95,8 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool
locale_is_c)
{
if (locale_is_c)
return pg_ascii_tolower(c);
- else if (locale)
- return tolower_l(c, locale->info.lt);
else
- return pg_tolower(c);
+ return tolower_l(c, locale->info.lt);
}
--
2.46.0
From 5d3b595fc08c3a013b5ebe532b3922bf9db7d963 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Wed, 7 Aug 2024 22:30:30 +0200
Subject: [PATCH 2/2] Remove separate locale_is_c arguments
Since e9931bfb751, ctype_is_c is part of pg_locale_t. Some functions
passed a pg_locale_t and a bool argument separately. This can now be
combined into one argument.
---
src/backend/utils/adt/like.c | 30 +++++++++++++++---------------
src/backend/utils/adt/like_match.c | 6 ++----
2 files changed, 17 insertions(+), 19 deletions(-)
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index bb4d73fb22e..a3a1178887c 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -33,18 +33,18 @@
static int SB_MatchText(const char *t, int tlen, const char *p, int plen,
- pg_locale_t locale, bool
locale_is_c);
+ pg_locale_t locale);
static text *SB_do_like_escape(text *pat, text *esc);
static int MB_MatchText(const char *t, int tlen, const char *p, int plen,
- pg_locale_t locale, bool
locale_is_c);
+ pg_locale_t locale);
static text *MB_do_like_escape(text *pat, text *esc);
static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
- pg_locale_t locale, bool
locale_is_c);
+ pg_locale_t locale);
static int SB_IMatchText(const char *t, int tlen, const char *p, int plen,
- pg_locale_t locale, bool
locale_is_c);
+ pg_locale_t locale);
static int GenericMatchText(const char *s, int slen, const char *p, int
plen, Oid collation);
static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
@@ -91,9 +91,9 @@ wchareq(const char *p1, const char *p2)
* fold-on-the-fly processing, however.
*/
static char
-SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
+SB_lower_char(unsigned char c, pg_locale_t locale)
{
- if (locale_is_c)
+ if (locale->ctype_is_c)
return pg_ascii_tolower(c);
else
return tolower_l(c, locale->info.lt);
@@ -129,7 +129,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool
locale_is_c)
#include "like_match.c"
/* setup to compile like_match.c for single byte case insensitive matches */
-#define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
+#define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale)
#define NextChar(p, plen) NextByte((p), (plen))
#define MatchText SB_IMatchText
@@ -158,11 +158,11 @@ GenericMatchText(const char *s, int slen, const char *p,
int plen, Oid collation
}
if (pg_database_encoding_max_length() == 1)
- return SB_MatchText(s, slen, p, plen, 0, true);
+ return SB_MatchText(s, slen, p, plen, 0);
else if (GetDatabaseEncoding() == PG_UTF8)
- return UTF8_MatchText(s, slen, p, plen, 0, true);
+ return UTF8_MatchText(s, slen, p, plen, 0);
else
- return MB_MatchText(s, slen, p, plen, 0, true);
+ return MB_MatchText(s, slen, p, plen, 0);
}
static inline int
@@ -212,9 +212,9 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str);
if (GetDatabaseEncoding() == PG_UTF8)
- return UTF8_MatchText(s, slen, p, plen, 0, true);
+ return UTF8_MatchText(s, slen, p, plen, 0);
else
- return MB_MatchText(s, slen, p, plen, 0, true);
+ return MB_MatchText(s, slen, p, plen, 0);
}
else
{
@@ -222,7 +222,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
plen = VARSIZE_ANY_EXHDR(pat);
s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str);
- return SB_IMatchText(s, slen, p, plen, locale,
locale->ctype_is_c);
+ return SB_IMatchText(s, slen, p, plen, locale);
}
}
@@ -330,7 +330,7 @@ bytealike(PG_FUNCTION_ARGS)
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
- result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
+ result = (SB_MatchText(s, slen, p, plen, 0) == LIKE_TRUE);
PG_RETURN_BOOL(result);
}
@@ -351,7 +351,7 @@ byteanlike(PG_FUNCTION_ARGS)
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
- result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
+ result = (SB_MatchText(s, slen, p, plen, 0) != LIKE_TRUE);
PG_RETURN_BOOL(result);
}
diff --git a/src/backend/utils/adt/like_match.c
b/src/backend/utils/adt/like_match.c
index f2990edff7e..928a1e99fc5 100644
--- a/src/backend/utils/adt/like_match.c
+++ b/src/backend/utils/adt/like_match.c
@@ -77,8 +77,7 @@
#endif
static int
-MatchText(const char *t, int tlen, const char *p, int plen,
- pg_locale_t locale, bool locale_is_c)
+MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
{
/* Fast path for match-everything pattern */
if (plen == 1 && *p == '%')
@@ -175,8 +174,7 @@ MatchText(const char *t, int tlen, const char *p, int plen,
{
if (GETCHAR(*t) == firstpat)
{
- int matched =
MatchText(t, tlen, p, plen,
-
locale, locale_is_c);
+ int matched =
MatchText(t, tlen, p, plen, locale);
if (matched != LIKE_FALSE)
return matched; /* TRUE or
ABORT */
--
2.46.0