From 09e927b864a6d53447082fb5bf83164a8ccf3fac Mon Sep 17 00:00:00 2001
From: Amit Langote <amitlan@postgresql.org>
Date: Mon, 6 Oct 2025 09:17:51 -0400
Subject: [PATCH v3] Fix internal error from CollateExpr in SQL/JSON DEFAULT
 expressions

SQL/JSON functions such as JSON_VALUE could fail with "unrecognized
node type" errors when a DEFAULT clause contained an explicit COLLATE
expression. That happened because assign_collations_walker() could
invoke exprSetCollation() on a JsonBehavior expression whose DEFAULT
still contained a CollateExpr, which exprSetCollation() does not
handle.

For example:

  SELECT JSON_VALUE('{"a":1}', '$.c' RETURNING text
                    DEFAULT 'A' COLLATE "C" ON EMPTY);

Fix by validating the DEFAULT expression's collation against the
RETURNING collation and by stripping any top-level CollateExpr in
transformJsonBehavior(), ensuring no such node is left for later
collation assignment. If the DEFAULT collation differs from the
RETURNING collation, raise a clear error at parse time. This also
guarantees a single, deterministic result collation rather than one
that could vary at runtime.

Tests are placed in collation.icu.utf8 to avoid failures on
buildfarm machines without ICU.

Reported-by: Jian He <jian.universality@gmail.com>
Author: Jian He <jian.universality@gmail.com>
Reviewed-by: Amit Langote <amitlangote09@gmail.com>
Discussion: https://postgr.es/m/CACJufxHVwYYSyiVQ6o+PsRX6zQ7rAFinh_fv1kCfTsT1xG4Zeg@mail.gmail.com
---
 src/backend/parser/parse_expr.c               | 63 ++++++++++++++++---
 .../regress/expected/collate.icu.utf8.out     | 49 +++++++++++++++
 src/test/regress/sql/collate.icu.utf8.sql     | 13 ++++
 3 files changed, 117 insertions(+), 8 deletions(-)

diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 9d95c7140ee..db3847fbc4d 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -94,7 +94,8 @@ static Node *transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func);
 static void transformJsonPassingArgs(ParseState *pstate, const char *constructName,
 									 JsonFormatType format, List *args,
 									 List **passing_values, List **passing_names);
-static JsonBehavior *transformJsonBehavior(ParseState *pstate, JsonBehavior *behavior,
+static JsonBehavior *transformJsonBehavior(ParseState *pstate, JsonExpr *jsexpr,
+										   JsonBehavior *behavior,
 										   JsonBehaviorType default_behavior,
 										   JsonReturning *returning);
 static Node *GetJsonBehaviorConst(JsonBehaviorType btype, int location);
@@ -4529,13 +4530,16 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func)
 			{
 				jsexpr->returning->typid = BOOLOID;
 				jsexpr->returning->typmod = -1;
+				jsexpr->collation = InvalidOid;
 			}
 
 			/* JSON_TABLE() COLUMNS can specify a non-boolean type. */
 			if (jsexpr->returning->typid != BOOLOID)
 				jsexpr->use_json_coercion = true;
 
-			jsexpr->on_error = transformJsonBehavior(pstate, func->on_error,
+			jsexpr->on_error = transformJsonBehavior(pstate,
+													 jsexpr,
+													 func->on_error,
 													 JSON_BEHAVIOR_FALSE,
 													 jsexpr->returning);
 			break;
@@ -4550,6 +4554,8 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func)
 				ret->typmod = -1;
 			}
 
+			jsexpr->collation = get_typcollation(jsexpr->returning->typid);
+
 			/*
 			 * Keep quotes on scalar strings by default, omitting them only if
 			 * OMIT QUOTES is specified.
@@ -4566,11 +4572,15 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func)
 				jsexpr->use_json_coercion = true;
 
 			/* Assume NULL ON EMPTY when ON EMPTY is not specified. */
-			jsexpr->on_empty = transformJsonBehavior(pstate, func->on_empty,
+			jsexpr->on_empty = transformJsonBehavior(pstate,
+													 jsexpr,
+													 func->on_empty,
 													 JSON_BEHAVIOR_NULL,
 													 jsexpr->returning);
 			/* Assume NULL ON ERROR when ON ERROR is not specified. */
-			jsexpr->on_error = transformJsonBehavior(pstate, func->on_error,
+			jsexpr->on_error = transformJsonBehavior(pstate,
+													 jsexpr,
+													 func->on_error,
 													 JSON_BEHAVIOR_NULL,
 													 jsexpr->returning);
 			break;
@@ -4582,6 +4592,7 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func)
 				jsexpr->returning->typid = TEXTOID;
 				jsexpr->returning->typmod = -1;
 			}
+			jsexpr->collation = get_typcollation(jsexpr->returning->typid);
 
 			/*
 			 * Override whatever transformJsonOutput() set these to, which
@@ -4607,11 +4618,15 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func)
 			}
 
 			/* Assume NULL ON EMPTY when ON EMPTY is not specified. */
-			jsexpr->on_empty = transformJsonBehavior(pstate, func->on_empty,
+			jsexpr->on_empty = transformJsonBehavior(pstate,
+													 jsexpr,
+													 func->on_empty,
 													 JSON_BEHAVIOR_NULL,
 													 jsexpr->returning);
 			/* Assume NULL ON ERROR when ON ERROR is not specified. */
-			jsexpr->on_error = transformJsonBehavior(pstate, func->on_error,
+			jsexpr->on_error = transformJsonBehavior(pstate,
+													 jsexpr,
+													 func->on_error,
 													 JSON_BEHAVIOR_NULL,
 													 jsexpr->returning);
 			break;
@@ -4622,6 +4637,7 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func)
 				jsexpr->returning->typid = exprType(jsexpr->formatted_expr);
 				jsexpr->returning->typmod = -1;
 			}
+			jsexpr->collation = get_typcollation(jsexpr->returning->typid);
 
 			/*
 			 * Assume EMPTY ARRAY ON ERROR when ON ERROR is not specified.
@@ -4629,7 +4645,9 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func)
 			 * ON EMPTY cannot be specified at the top level but it can be for
 			 * the individual columns.
 			 */
-			jsexpr->on_error = transformJsonBehavior(pstate, func->on_error,
+			jsexpr->on_error = transformJsonBehavior(pstate,
+													 jsexpr,
+													 func->on_error,
 													 JSON_BEHAVIOR_EMPTY_ARRAY,
 													 jsexpr->returning);
 			break;
@@ -4705,7 +4723,8 @@ ValidJsonBehaviorDefaultExpr(Node *expr, void *context)
  * Transform a JSON BEHAVIOR clause.
  */
 static JsonBehavior *
-transformJsonBehavior(ParseState *pstate, JsonBehavior *behavior,
+transformJsonBehavior(ParseState *pstate, JsonExpr *jsexpr,
+					  JsonBehavior *behavior,
 					  JsonBehaviorType default_behavior,
 					  JsonReturning *returning)
 {
@@ -4720,7 +4739,11 @@ transformJsonBehavior(ParseState *pstate, JsonBehavior *behavior,
 		location = behavior->location;
 		if (btype == JSON_BEHAVIOR_DEFAULT)
 		{
+			Oid			targetcoll = jsexpr->collation;
+			Oid			exprcoll;
+
 			expr = transformExprRecurse(pstate, behavior->expr);
+
 			if (!ValidJsonBehaviorDefaultExpr(expr, NULL))
 				ereport(ERROR,
 						(errcode(ERRCODE_DATATYPE_MISMATCH),
@@ -4736,6 +4759,30 @@ transformJsonBehavior(ParseState *pstate, JsonBehavior *behavior,
 						(errcode(ERRCODE_DATATYPE_MISMATCH),
 						 errmsg("DEFAULT expression must not return a set"),
 						 parser_errposition(pstate, exprLocation(expr))));
+
+			exprcoll = exprCollation(expr);
+			if (!OidIsValid(exprcoll))
+				exprcoll = get_typcollation(exprType(expr));
+			if (OidIsValid(targetcoll) && OidIsValid(exprcoll) &&
+				targetcoll != exprcoll)
+				ereport(ERROR,
+						errcode(ERRCODE_COLLATION_MISMATCH),
+						errmsg("the collation of DEFAULT expression conflicts with RETURNING clause"),
+						errdetail("\"%s\" versus \"%s\"",
+								  get_collation_name(exprcoll),
+								  get_collation_name(targetcoll)),
+						parser_errposition(pstate, exprLocation(expr)));
+
+			/*
+			 * Strip any top-level COLLATE from the DEFAULT expression. The
+			 * result collation of the enclosing JSON expression is
+			 * jsexpr->collation (chosen from RETURNING), and we have already
+			 * verified that any explicit COLLATE on DEFAULT equals that
+			 * target. Removing the wrapper avoids passing CollateExpr to
+			 * exprSetCollation() and does not change semantics.
+			 */
+			while (IsA(expr, CollateExpr))
+				expr = (Node *) ((CollateExpr *) expr)->arg;
 		}
 	}
 
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 69805d4b9ec..d385668afaf 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -2690,6 +2690,55 @@ SELECT * FROM t5 ORDER BY c ASC, a ASC;
  3 | d1 | d1
 (3 rows)
 
+-- Check that DEFAULT expressions in SQL/JSON functions use the same collation
+-- as the RETURNING type.  Mismatched collations should raise an error.
+CREATE DOMAIN d1 AS text COLLATE case_insensitive;
+CREATE DOMAIN d2 AS text COLLATE "C";
+SELECT JSON_VALUE('{"a": "A"}', '$.a' RETURNING d1 DEFAULT ('C' COLLATE "C") COLLATE case_insensitive ON EMPTY) = 'a'; -- true
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT JSON_VALUE('{"a": "A"}', '$.a' RETURNING d1 DEFAULT 'C' ON EMPTY) = 'a'; -- true
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT JSON_VALUE('{"a": "A"}', '$.a' RETURNING d1 DEFAULT 'C'::d2 ON EMPTY) = 'a'; -- error
+ERROR:  the collation of DEFAULT expression conflicts with RETURNING clause
+LINE 1: ...ON_VALUE('{"a": "A"}', '$.a' RETURNING d1 DEFAULT 'C'::d2 ON...
+                                                             ^
+DETAIL:  "C" versus "case_insensitive"
+SELECT JSON_VALUE('{"a": "A"}', '$.a' RETURNING d1 DEFAULT 'C' COLLATE "C" ON EMPTY) = 'a'; -- error
+ERROR:  the collation of DEFAULT expression conflicts with RETURNING clause
+LINE 1: ...ON_VALUE('{"a": "A"}', '$.a' RETURNING d1 DEFAULT 'C' COLLAT...
+                                                             ^
+DETAIL:  "C" versus "case_insensitive"
+SELECT JSON_VALUE('{"a": "A"}', '$.c' RETURNING d1 DEFAULT 'A' ON EMPTY) = 'a'; -- true
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT JSON_VALUE('{"a": "A"}', '$.c' RETURNING d1 DEFAULT 'A' COLLATE case_insensitive ON EMPTY) = 'a'; -- true
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT JSON_VALUE('{"a": "A"}', '$.c' RETURNING d1 DEFAULT 'A'::d2 ON EMPTY) = 'a'; -- error
+ERROR:  the collation of DEFAULT expression conflicts with RETURNING clause
+LINE 1: ...ON_VALUE('{"a": "A"}', '$.c' RETURNING d1 DEFAULT 'A'::d2 ON...
+                                                             ^
+DETAIL:  "C" versus "case_insensitive"
+SELECT JSON_VALUE('{"a": "A"}', '$.c' RETURNING d1 DEFAULT 'A' COLLATE "C" ON EMPTY) = 'a'; -- error
+ERROR:  the collation of DEFAULT expression conflicts with RETURNING clause
+LINE 1: ...ON_VALUE('{"a": "A"}', '$.c' RETURNING d1 DEFAULT 'A' COLLAT...
+                                                             ^
+DETAIL:  "C" versus "case_insensitive"
+DROP DOMAIN d1, d2;
 -- cleanup
 RESET search_path;
 SET client_min_messages TO warning;
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index dbc190227d0..6f5abac0dc0 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -997,6 +997,19 @@ INSERT INTO t5 (a, b) values (1, 'D1'), (2, 'D2'), (3, 'd1');
 -- rewriting.)
 SELECT * FROM t5 ORDER BY c ASC, a ASC;
 
+-- Check that DEFAULT expressions in SQL/JSON functions use the same collation
+-- as the RETURNING type.  Mismatched collations should raise an error.
+CREATE DOMAIN d1 AS text COLLATE case_insensitive;
+CREATE DOMAIN d2 AS text COLLATE "C";
+SELECT JSON_VALUE('{"a": "A"}', '$.a' RETURNING d1 DEFAULT ('C' COLLATE "C") COLLATE case_insensitive ON EMPTY) = 'a'; -- true
+SELECT JSON_VALUE('{"a": "A"}', '$.a' RETURNING d1 DEFAULT 'C' ON EMPTY) = 'a'; -- true
+SELECT JSON_VALUE('{"a": "A"}', '$.a' RETURNING d1 DEFAULT 'C'::d2 ON EMPTY) = 'a'; -- error
+SELECT JSON_VALUE('{"a": "A"}', '$.a' RETURNING d1 DEFAULT 'C' COLLATE "C" ON EMPTY) = 'a'; -- error
+SELECT JSON_VALUE('{"a": "A"}', '$.c' RETURNING d1 DEFAULT 'A' ON EMPTY) = 'a'; -- true
+SELECT JSON_VALUE('{"a": "A"}', '$.c' RETURNING d1 DEFAULT 'A' COLLATE case_insensitive ON EMPTY) = 'a'; -- true
+SELECT JSON_VALUE('{"a": "A"}', '$.c' RETURNING d1 DEFAULT 'A'::d2 ON EMPTY) = 'a'; -- error
+SELECT JSON_VALUE('{"a": "A"}', '$.c' RETURNING d1 DEFAULT 'A' COLLATE "C" ON EMPTY) = 'a'; -- error
+DROP DOMAIN d1, d2;
 
 -- cleanup
 RESET search_path;
-- 
2.47.3

