On 08.08.24 22:00, Jeff Davis wrote:
On Wed, 2024-08-07 at 22:44 +0200, Peter Eisentraut wrote:
But after this patch set, locale cannot be NULL anymore, so the third
branch is obsolete.
...
Second, there are a number of functions in like.c like the above that
take separate arguments like pg_locale_t locale, bool locale_is_c.
Because pg_locale_t now contains the locale_is_c information, these
can
be combined.
I believe these patches are correct, but the reasoning is fairly
complex:
1. Some MatchText variants are called with 0 for locale. But that's OK
because ...
2. A MatchText variant only cares about the locale if MATCH_LOWER(t) is
defined, and ...
3. Only one variant, SB_IMatchText() defines MATCH_LOWER(), and ...
4. SB_IMatchText() is called with a non-zero locale.
All of these are a bit confusing to follow because it's generated code.
#2 is particularly non-obvious, because "locale" is not even an
argument of the MATCH_LOWER(t) or GETCHAR(t) macros, it's taken
implicitly from the outer scope.
I don't think your patches cause this confusion, but is there a way you
can clarify some of this along the way?
In the end, I figured the best thing to do here is to add an explicit
locale argument to MATCH_LOWER() and GETCHAR() so one can actually
understand this code by reading it. New patch attached.
From 53c5dd60ca8a7d9500624a430c7776bfa345a396 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Mon, 9 Sep 2024 15:33:25 +0200
Subject: [PATCH v2] Remove separate locale_is_c arguments
Since e9931bfb751, ctype_is_c is part of pg_locale_t. Some functions
passed a pg_locale_t and a bool argument separately. This can now be
combined into one argument.
Since some callers call MatchText() with locale 0, it is a bit
confusing whether this is all correct. But it is the case that only
callers that pass a non-zero locale object to MatchText() end up
checking locale->ctype_is_c. To make that flow a bit more
understandable, add the locale argument to MATCH_LOWER() and GETCHAR()
in like_match.c, instead of implicitly taking it from the outer scope.
---
src/backend/utils/adt/like.c | 30 +++++++++++++++---------------
src/backend/utils/adt/like_match.c | 20 +++++++++-----------
2 files changed, 24 insertions(+), 26 deletions(-)
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index f87675d7557..9fb036ad014 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -33,18 +33,18 @@
static int SB_MatchText(const char *t, int tlen, const char *p, int plen,
- pg_locale_t locale, bool
locale_is_c);
+ pg_locale_t locale);
static text *SB_do_like_escape(text *pat, text *esc);
static int MB_MatchText(const char *t, int tlen, const char *p, int plen,
- pg_locale_t locale, bool
locale_is_c);
+ pg_locale_t locale);
static text *MB_do_like_escape(text *pat, text *esc);
static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
- pg_locale_t locale, bool
locale_is_c);
+ pg_locale_t locale);
static int SB_IMatchText(const char *t, int tlen, const char *p, int plen,
- pg_locale_t locale, bool
locale_is_c);
+ pg_locale_t locale);
static int GenericMatchText(const char *s, int slen, const char *p, int
plen, Oid collation);
static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
@@ -91,9 +91,9 @@ wchareq(const char *p1, const char *p2)
* fold-on-the-fly processing, however.
*/
static char
-SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
+SB_lower_char(unsigned char c, pg_locale_t locale)
{
- if (locale_is_c)
+ if (locale->ctype_is_c)
return pg_ascii_tolower(c);
else
return tolower_l(c, locale->info.lt);
@@ -129,7 +129,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool
locale_is_c)
#include "like_match.c"
/* setup to compile like_match.c for single byte case insensitive matches */
-#define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
+#define MATCH_LOWER(t, locale) SB_lower_char((unsigned char) (t), locale)
#define NextChar(p, plen) NextByte((p), (plen))
#define MatchText SB_IMatchText
@@ -158,11 +158,11 @@ GenericMatchText(const char *s, int slen, const char *p,
int plen, Oid collation
}
if (pg_database_encoding_max_length() == 1)
- return SB_MatchText(s, slen, p, plen, 0, true);
+ return SB_MatchText(s, slen, p, plen, 0);
else if (GetDatabaseEncoding() == PG_UTF8)
- return UTF8_MatchText(s, slen, p, plen, 0, true);
+ return UTF8_MatchText(s, slen, p, plen, 0);
else
- return MB_MatchText(s, slen, p, plen, 0, true);
+ return MB_MatchText(s, slen, p, plen, 0);
}
static inline int
@@ -212,9 +212,9 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str);
if (GetDatabaseEncoding() == PG_UTF8)
- return UTF8_MatchText(s, slen, p, plen, 0, true);
+ return UTF8_MatchText(s, slen, p, plen, 0);
else
- return MB_MatchText(s, slen, p, plen, 0, true);
+ return MB_MatchText(s, slen, p, plen, 0);
}
else
{
@@ -222,7 +222,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
plen = VARSIZE_ANY_EXHDR(pat);
s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str);
- return SB_IMatchText(s, slen, p, plen, locale,
locale->ctype_is_c);
+ return SB_IMatchText(s, slen, p, plen, locale);
}
}
@@ -330,7 +330,7 @@ bytealike(PG_FUNCTION_ARGS)
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
- result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
+ result = (SB_MatchText(s, slen, p, plen, 0) == LIKE_TRUE);
PG_RETURN_BOOL(result);
}
@@ -351,7 +351,7 @@ byteanlike(PG_FUNCTION_ARGS)
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
- result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
+ result = (SB_MatchText(s, slen, p, plen, 0) != LIKE_TRUE);
PG_RETURN_BOOL(result);
}
diff --git a/src/backend/utils/adt/like_match.c
b/src/backend/utils/adt/like_match.c
index f2990edff7e..f561cc15e4c 100644
--- a/src/backend/utils/adt/like_match.c
+++ b/src/backend/utils/adt/like_match.c
@@ -71,14 +71,13 @@
*/
#ifdef MATCH_LOWER
-#define GETCHAR(t) MATCH_LOWER(t)
+#define GETCHAR(t, locale) MATCH_LOWER(t, locale)
#else
-#define GETCHAR(t) (t)
+#define GETCHAR(t, locale) (t)
#endif
static int
-MatchText(const char *t, int tlen, const char *p, int plen,
- pg_locale_t locale, bool locale_is_c)
+MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
{
/* Fast path for match-everything pattern */
if (plen == 1 && *p == '%')
@@ -106,7 +105,7 @@ MatchText(const char *t, int tlen, const char *p, int plen,
ereport(ERROR,
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
errmsg("LIKE pattern must not
end with escape character")));
- if (GETCHAR(*p) != GETCHAR(*t))
+ if (GETCHAR(*p, locale) != GETCHAR(*t, locale))
return LIKE_FALSE;
}
else if (*p == '%')
@@ -166,17 +165,16 @@ MatchText(const char *t, int tlen, const char *p, int
plen,
ereport(ERROR,
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
errmsg("LIKE pattern
must not end with escape character")));
- firstpat = GETCHAR(p[1]);
+ firstpat = GETCHAR(p[1], locale);
}
else
- firstpat = GETCHAR(*p);
+ firstpat = GETCHAR(*p, locale);
while (tlen > 0)
{
- if (GETCHAR(*t) == firstpat)
+ if (GETCHAR(*t, locale) == firstpat)
{
- int matched =
MatchText(t, tlen, p, plen,
-
locale, locale_is_c);
+ int matched =
MatchText(t, tlen, p, plen, locale);
if (matched != LIKE_FALSE)
return matched; /* TRUE or
ABORT */
@@ -198,7 +196,7 @@ MatchText(const char *t, int tlen, const char *p, int plen,
NextByte(p, plen);
continue;
}
- else if (GETCHAR(*p) != GETCHAR(*t))
+ else if (GETCHAR(*p, locale) != GETCHAR(*t, locale))
{
/* non-wildcard pattern char fails to match text char */
return LIKE_FALSE;
--
2.46.0