On 08.08.24 22:00, Jeff Davis wrote:
On Wed, 2024-08-07 at 22:44 +0200, Peter Eisentraut wrote:
But after this patch set, locale cannot be NULL anymore, so the third
branch is obsolete.

...

Second, there are a number of functions in like.c like the above that
take separate arguments like pg_locale_t locale, bool locale_is_c.
Because pg_locale_t now contains the locale_is_c information, these
can
be combined.

I believe these patches are correct, but the reasoning is fairly
complex:

1. Some MatchText variants are called with 0 for locale. But that's OK
because ...

2. A MatchText variant only cares about the locale if MATCH_LOWER(t) is
defined, and ...

3. Only one variant, SB_IMatchText() defines MATCH_LOWER(), and ...

4. SB_IMatchText() is called with a non-zero locale.

All of these are a bit confusing to follow because it's generated code.
#2 is particularly non-obvious, because "locale" is not even an
argument of the MATCH_LOWER(t) or GETCHAR(t) macros, it's taken
implicitly from the outer scope.

I don't think your patches cause this confusion, but is there a way you
can clarify some of this along the way?

In the end, I figured the best thing to do here is to add an explicit locale argument to MATCH_LOWER() and GETCHAR() so one can actually understand this code by reading it. New patch attached.
From 53c5dd60ca8a7d9500624a430c7776bfa345a396 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Mon, 9 Sep 2024 15:33:25 +0200
Subject: [PATCH v2] Remove separate locale_is_c arguments

Since e9931bfb751, ctype_is_c is part of pg_locale_t.  Some functions
passed a pg_locale_t and a bool argument separately.  This can now be
combined into one argument.

Since some callers call MatchText() with locale 0, it is a bit
confusing whether this is all correct.  But it is the case that only
callers that pass a non-zero locale object to MatchText() end up
checking locale->ctype_is_c.  To make that flow a bit more
understandable, add the locale argument to MATCH_LOWER() and GETCHAR()
in like_match.c, instead of implicitly taking it from the outer scope.
---
 src/backend/utils/adt/like.c       | 30 +++++++++++++++---------------
 src/backend/utils/adt/like_match.c | 20 +++++++++-----------
 2 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index f87675d7557..9fb036ad014 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -33,18 +33,18 @@
 
 
 static int     SB_MatchText(const char *t, int tlen, const char *p, int plen,
-                                                pg_locale_t locale, bool 
locale_is_c);
+                                                pg_locale_t locale);
 static text *SB_do_like_escape(text *pat, text *esc);
 
 static int     MB_MatchText(const char *t, int tlen, const char *p, int plen,
-                                                pg_locale_t locale, bool 
locale_is_c);
+                                                pg_locale_t locale);
 static text *MB_do_like_escape(text *pat, text *esc);
 
 static int     UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
-                                                  pg_locale_t locale, bool 
locale_is_c);
+                                                  pg_locale_t locale);
 
 static int     SB_IMatchText(const char *t, int tlen, const char *p, int plen,
-                                                 pg_locale_t locale, bool 
locale_is_c);
+                                                 pg_locale_t locale);
 
 static int     GenericMatchText(const char *s, int slen, const char *p, int 
plen, Oid collation);
 static int     Generic_Text_IC_like(text *str, text *pat, Oid collation);
@@ -91,9 +91,9 @@ wchareq(const char *p1, const char *p2)
  * fold-on-the-fly processing, however.
  */
 static char
-SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
+SB_lower_char(unsigned char c, pg_locale_t locale)
 {
-       if (locale_is_c)
+       if (locale->ctype_is_c)
                return pg_ascii_tolower(c);
        else
                return tolower_l(c, locale->info.lt);
@@ -129,7 +129,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool 
locale_is_c)
 #include "like_match.c"
 
 /* setup to compile like_match.c for single byte case insensitive matches */
-#define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
+#define MATCH_LOWER(t, locale) SB_lower_char((unsigned char) (t), locale)
 #define NextChar(p, plen) NextByte((p), (plen))
 #define MatchText SB_IMatchText
 
@@ -158,11 +158,11 @@ GenericMatchText(const char *s, int slen, const char *p, 
int plen, Oid collation
        }
 
        if (pg_database_encoding_max_length() == 1)
-               return SB_MatchText(s, slen, p, plen, 0, true);
+               return SB_MatchText(s, slen, p, plen, 0);
        else if (GetDatabaseEncoding() == PG_UTF8)
-               return UTF8_MatchText(s, slen, p, plen, 0, true);
+               return UTF8_MatchText(s, slen, p, plen, 0);
        else
-               return MB_MatchText(s, slen, p, plen, 0, true);
+               return MB_MatchText(s, slen, p, plen, 0);
 }
 
 static inline int
@@ -212,9 +212,9 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
                s = VARDATA_ANY(str);
                slen = VARSIZE_ANY_EXHDR(str);
                if (GetDatabaseEncoding() == PG_UTF8)
-                       return UTF8_MatchText(s, slen, p, plen, 0, true);
+                       return UTF8_MatchText(s, slen, p, plen, 0);
                else
-                       return MB_MatchText(s, slen, p, plen, 0, true);
+                       return MB_MatchText(s, slen, p, plen, 0);
        }
        else
        {
@@ -222,7 +222,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
                plen = VARSIZE_ANY_EXHDR(pat);
                s = VARDATA_ANY(str);
                slen = VARSIZE_ANY_EXHDR(str);
-               return SB_IMatchText(s, slen, p, plen, locale, 
locale->ctype_is_c);
+               return SB_IMatchText(s, slen, p, plen, locale);
        }
 }
 
@@ -330,7 +330,7 @@ bytealike(PG_FUNCTION_ARGS)
        p = VARDATA_ANY(pat);
        plen = VARSIZE_ANY_EXHDR(pat);
 
-       result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
+       result = (SB_MatchText(s, slen, p, plen, 0) == LIKE_TRUE);
 
        PG_RETURN_BOOL(result);
 }
@@ -351,7 +351,7 @@ byteanlike(PG_FUNCTION_ARGS)
        p = VARDATA_ANY(pat);
        plen = VARSIZE_ANY_EXHDR(pat);
 
-       result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
+       result = (SB_MatchText(s, slen, p, plen, 0) != LIKE_TRUE);
 
        PG_RETURN_BOOL(result);
 }
diff --git a/src/backend/utils/adt/like_match.c 
b/src/backend/utils/adt/like_match.c
index f2990edff7e..f561cc15e4c 100644
--- a/src/backend/utils/adt/like_match.c
+++ b/src/backend/utils/adt/like_match.c
@@ -71,14 +71,13 @@
  */
 
 #ifdef MATCH_LOWER
-#define GETCHAR(t) MATCH_LOWER(t)
+#define GETCHAR(t, locale) MATCH_LOWER(t, locale)
 #else
-#define GETCHAR(t) (t)
+#define GETCHAR(t, locale) (t)
 #endif
 
 static int
-MatchText(const char *t, int tlen, const char *p, int plen,
-                 pg_locale_t locale, bool locale_is_c)
+MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
 {
        /* Fast path for match-everything pattern */
        if (plen == 1 && *p == '%')
@@ -106,7 +105,7 @@ MatchText(const char *t, int tlen, const char *p, int plen,
                                ereport(ERROR,
                                                
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
                                                 errmsg("LIKE pattern must not 
end with escape character")));
-                       if (GETCHAR(*p) != GETCHAR(*t))
+                       if (GETCHAR(*p, locale) != GETCHAR(*t, locale))
                                return LIKE_FALSE;
                }
                else if (*p == '%')
@@ -166,17 +165,16 @@ MatchText(const char *t, int tlen, const char *p, int 
plen,
                                        ereport(ERROR,
                                                        
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
                                                         errmsg("LIKE pattern 
must not end with escape character")));
-                               firstpat = GETCHAR(p[1]);
+                               firstpat = GETCHAR(p[1], locale);
                        }
                        else
-                               firstpat = GETCHAR(*p);
+                               firstpat = GETCHAR(*p, locale);
 
                        while (tlen > 0)
                        {
-                               if (GETCHAR(*t) == firstpat)
+                               if (GETCHAR(*t, locale) == firstpat)
                                {
-                                       int                     matched = 
MatchText(t, tlen, p, plen,
-                                                                               
                        locale, locale_is_c);
+                                       int                     matched = 
MatchText(t, tlen, p, plen, locale);
 
                                        if (matched != LIKE_FALSE)
                                                return matched; /* TRUE or 
ABORT */
@@ -198,7 +196,7 @@ MatchText(const char *t, int tlen, const char *p, int plen,
                        NextByte(p, plen);
                        continue;
                }
-               else if (GETCHAR(*p) != GETCHAR(*t))
+               else if (GETCHAR(*p, locale) != GETCHAR(*t, locale))
                {
                        /* non-wildcard pattern char fails to match text char */
                        return LIKE_FALSE;
-- 
2.46.0

Reply via email to