From 7f3218da7d1206664522d41ff247b1e96815a757 Mon Sep 17 00:00:00 2001
From: "Chao Li (Evan)" <lic@highgo.com>
Date: Tue, 25 Nov 2025 13:40:13 +0800
Subject: [PATCH v2 2/2] Make libc-based case-folding functions match
 unicode_strlower()'s behavior
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The comments for unicode_strlower() state that the destination buffer is
filled with as many characters as will fit, truncating the result if needed;
the string is NUL-terminated only if there is sufficient space.  In contrast,
the libc variants (strlower_libc_sb(), strupper_libc_sb(), and
strtitle_libc_sb()) previously refused to copy anything unless the destination
was large enough for the full result plus the terminating NULL.

This patch updates all three libc-based functions to follow the same
“best effort copy + optional NULL terminator” model as unicode_strlower().
This ensures consistent behavior across all case-folding routines, regardless
of whether a Unicode locale is in use.

No existing regression tests required changes, and “make check” passes.

Author: Chao Li <lic@highgo.com>
Discussion: https://postgr.es/m/CAEoWx2mW0P8CByavV58zm3=eb2MQHaKOcDEF5B2UJYRyC2c3ig@mail.gmail.com
---
 src/backend/utils/adt/pg_locale_libc.c | 159 +++++++++++++------------
 1 file changed, 81 insertions(+), 78 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index abf27283a33..c5ff2a0b681 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -426,36 +426,37 @@ static size_t
 strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
 {
+	ssize_t		reallen;
+	locale_t	loc = locale->lt;
+
 	if (srclen < 0)
 		srclen = strlen(src);
 
-	if (srclen + 1 <= destsize)
-	{
-		locale_t	loc = locale->lt;
-		char	   *p;
+	reallen = srclen + 1;
+	if (reallen > destsize)
+		reallen = destsize;
 
-		memcpy(dest, src, srclen);
-		dest[srclen] = '\0';
+	memcpy(dest, src, reallen);
+	if (reallen < destsize)
+		dest[reallen] = '\0';
 
-		/*
-		 * Note: we assume that tolower_l() will not be so broken as to need
-		 * an isupper_l() guard test.  When using the default collation, we
-		 * apply the traditional Postgres behavior that forces ASCII-style
-		 * treatment of I/i, but in non-default collations you get exactly
-		 * what the collation says.
-		 */
-		for (p = dest; *p; p++)
+	/*
+	 * Note: we assume that tolower_l() will not be so broken as to need an
+	 * isupper_l() guard test.  When using the default collation, we apply the
+	 * traditional Postgres behavior that forces ASCII-style treatment of I/i,
+	 * but in non-default collations you get exactly what the collation says.
+	 */
+	for (char *p = dest; *p; p++)
+	{
+		if (locale->is_default)
 		{
-			if (locale->is_default)
-			{
-				if (*p >= 'A' && *p <= 'Z')
-					*p += 'a' - 'A';
-				else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
-					*p = tolower_l((unsigned char) *p, loc);
-			}
-			else
+			if (*p >= 'A' && *p <= 'Z')
+				*p += 'a' - 'A';
+			else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
 				*p = tolower_l((unsigned char) *p, loc);
 		}
+		else
+			*p = tolower_l((unsigned char) *p, loc);
 	}
 
 	return srclen;
@@ -513,53 +514,54 @@ static size_t
 strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
 {
+	ssize_t		reallen;
+	locale_t	loc = locale->lt;
+	int			wasalnum = false;
+
 	if (srclen < 0)
 		srclen = strlen(src);
 
-	if (srclen + 1 <= destsize)
-	{
-		locale_t	loc = locale->lt;
-		int			wasalnum = false;
-		char	   *p;
+	reallen = srclen + 1;
+	if (reallen > destsize)
+		reallen = destsize;
 
-		memcpy(dest, src, srclen);
-		dest[srclen] = '\0';
+	memcpy(dest, src, reallen);
+	if (reallen < destsize)
+		dest[reallen] = '\0';
 
-		/*
-		 * Note: we assume that toupper_l()/tolower_l() will not be so broken
-		 * as to need guard tests.  When using the default collation, we apply
-		 * the traditional Postgres behavior that forces ASCII-style treatment
-		 * of I/i, but in non-default collations you get exactly what the
-		 * collation says.
-		 */
-		for (p = dest; *p; p++)
+	/*
+	 * Note: we assume that toupper_l()/tolower_l() will not be so broken as
+	 * to need guard tests.  When using the default collation, we apply the
+	 * traditional Postgres behavior that forces ASCII-style treatment of I/i,
+	 * but in non-default collations you get exactly what the collation says.
+	 */
+	for (char *p = dest; *p; p++)
+	{
+		if (locale->is_default)
 		{
-			if (locale->is_default)
+			if (wasalnum)
 			{
-				if (wasalnum)
-				{
-					if (*p >= 'A' && *p <= 'Z')
-						*p += 'a' - 'A';
-					else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
-						*p = tolower_l((unsigned char) *p, loc);
-				}
-				else
-				{
-					if (*p >= 'a' && *p <= 'z')
-						*p -= 'a' - 'A';
-					else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
-						*p = toupper_l((unsigned char) *p, loc);
-				}
+				if (*p >= 'A' && *p <= 'Z')
+					*p += 'a' - 'A';
+				else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
+					*p = tolower_l((unsigned char) *p, loc);
 			}
 			else
 			{
-				if (wasalnum)
-					*p = tolower_l((unsigned char) *p, loc);
-				else
+				if (*p >= 'a' && *p <= 'z')
+					*p -= 'a' - 'A';
+				else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
 					*p = toupper_l((unsigned char) *p, loc);
 			}
-			wasalnum = isalnum_l((unsigned char) *p, loc);
 		}
+		else
+		{
+			if (wasalnum)
+				*p = tolower_l((unsigned char) *p, loc);
+			else
+				*p = toupper_l((unsigned char) *p, loc);
+		}
+		wasalnum = isalnum_l((unsigned char) *p, loc);
 	}
 
 	return srclen;
@@ -624,36 +626,37 @@ static size_t
 strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
 {
+	ssize_t		reallen;
+	locale_t	loc = locale->lt;
+
 	if (srclen < 0)
 		srclen = strlen(src);
 
-	if (srclen + 1 <= destsize)
-	{
-		locale_t	loc = locale->lt;
-		char	   *p;
+	reallen = srclen + 1;
+	if (reallen > destsize)
+		reallen = destsize;
 
-		memcpy(dest, src, srclen);
-		dest[srclen] = '\0';
+	memcpy(dest, src, reallen);
+	if (reallen < destsize)
+		dest[reallen] = '\0';
 
-		/*
-		 * Note: we assume that toupper_l() will not be so broken as to need
-		 * an islower_l() guard test.  When using the default collation, we
-		 * apply the traditional Postgres behavior that forces ASCII-style
-		 * treatment of I/i, but in non-default collations you get exactly
-		 * what the collation says.
-		 */
-		for (p = dest; *p; p++)
+	/*
+	 * Note: we assume that toupper_l() will not be so broken as to need an
+	 * islower_l() guard test.  When using the default collation, we apply the
+	 * traditional Postgres behavior that forces ASCII-style treatment of I/i,
+	 * but in non-default collations you get exactly what the collation says.
+	 */
+	for (char *p = dest; *p; p++)
+	{
+		if (locale->is_default)
 		{
-			if (locale->is_default)
-			{
-				if (*p >= 'a' && *p <= 'z')
-					*p -= 'a' - 'A';
-				else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
-					*p = toupper_l((unsigned char) *p, loc);
-			}
-			else
+			if (*p >= 'a' && *p <= 'z')
+				*p -= 'a' - 'A';
+			else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
 				*p = toupper_l((unsigned char) *p, loc);
 		}
+		else
+			*p = toupper_l((unsigned char) *p, loc);
 	}
 
 	return srclen;
-- 
2.39.5 (Apple Git-154)

