On Tue, Aug 12, 2025 at 7:09 PM Amit Langote <[email protected]> wrote: > > Hi Jian, > > Thanks for the patch and also for the offlist heads-up. > > I agree with rejecting cases where the DEFAULT clause’s collation does not > match the RETURNING collation. The result collation for json_value should > come from the RETURNING clause if it has an explicit COLLATE, otherwise from > the RETURNING type’s collation, and both the extracted value source (the > value obtained from the JSON path when it matches) and the DEFAULT source > should match it. >
hi. based on my understand of https://www.postgresql.org/docs/current/collation.html#COLLATION-CONCEPTS <<<<<<< 1. If any input expression has an explicit collation derivation, then all explicitly derived collations among the input expressions must be the same, otherwise an error is raised. If any explicitly derived collation is present, that is the result of the collation combination. 2. Otherwise, all input expressions must have the same implicit collation derivation or the default collation. If any non-default collation is present, that is the result of the collation combination. Otherwise, the result is the default collation. <<<<<<< CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false); create domain d1 as text collate case_insensitive; create domain d2 as text collate "C"; the below two queries should error out: select json_value('{"a": "A"}', '$.a' returning d1 default 'C'::d2 on empty) = 'a'; --error select json_value('{"a": "A"}', '$.a' returning d1 default 'C' collate "C" on empty) = 'a'; --error please check attached patch.
From c8489365872309b166f40101835586247c137ba0 Mon Sep 17 00:00:00 2001 From: jian he <[email protected]> Date: Fri, 3 Oct 2025 20:53:48 +0800 Subject: [PATCH v2 1/1] fix SQL/JSON default expression collation issue create table t(a jsonb); select json_value(a, '$.c' returning text default 'A' collate "C" on empty) from t; As you can see, this query returns a set of rows. If the collation of the DEFAULT node differs from the default text collation, the resulting set may have inconsistent collations. As a result, the query's collation becomes unreliable. For instance, is it valid to create the following index in this case? create index xx on t (( json_value(a, '$.c' returning text default 'A' on empty) )); discussion: https://postgr.es/m/cacjufxhvwyysyivq6o+psrx6zq7rafinh_fv1kcftst1xg4...@mail.gmail.com --- src/backend/parser/parse_expr.c | 59 ++++++++++++++++--- .../regress/expected/sqljson_queryfuncs.out | 48 +++++++++++++++ src/test/regress/sql/sqljson_queryfuncs.sql | 15 +++++ 3 files changed, 114 insertions(+), 8 deletions(-) diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index e1979a80c19..0296e92e108 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -93,7 +93,8 @@ static Node *transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func); static void transformJsonPassingArgs(ParseState *pstate, const char *constructName, JsonFormatType format, List *args, List **passing_values, List **passing_names); -static JsonBehavior *transformJsonBehavior(ParseState *pstate, JsonBehavior *behavior, +static JsonBehavior *transformJsonBehavior(ParseState *pstate, JsonExpr *jsexpr, + JsonBehavior *behavior, JsonBehaviorType default_behavior, JsonReturning *returning); static Node *GetJsonBehaviorConst(JsonBehaviorType btype, int location); @@ -4528,13 +4529,16 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func) { jsexpr->returning->typid = BOOLOID; jsexpr->returning->typmod = -1; + jsexpr->collation = InvalidOid; } /* JSON_TABLE() COLUMNS can specify a non-boolean type. */ if (jsexpr->returning->typid != BOOLOID) jsexpr->use_json_coercion = true; - jsexpr->on_error = transformJsonBehavior(pstate, func->on_error, + jsexpr->on_error = transformJsonBehavior(pstate, + jsexpr, + func->on_error, JSON_BEHAVIOR_FALSE, jsexpr->returning); break; @@ -4549,6 +4553,8 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func) ret->typmod = -1; } + jsexpr->collation = get_typcollation(jsexpr->returning->typid); + /* * Keep quotes on scalar strings by default, omitting them only if * OMIT QUOTES is specified. @@ -4565,11 +4571,15 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func) jsexpr->use_json_coercion = true; /* Assume NULL ON EMPTY when ON EMPTY is not specified. */ - jsexpr->on_empty = transformJsonBehavior(pstate, func->on_empty, + jsexpr->on_empty = transformJsonBehavior(pstate, + jsexpr, + func->on_empty, JSON_BEHAVIOR_NULL, jsexpr->returning); /* Assume NULL ON ERROR when ON ERROR is not specified. */ - jsexpr->on_error = transformJsonBehavior(pstate, func->on_error, + jsexpr->on_error = transformJsonBehavior(pstate, + jsexpr, + func->on_error, JSON_BEHAVIOR_NULL, jsexpr->returning); break; @@ -4581,6 +4591,7 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func) jsexpr->returning->typid = TEXTOID; jsexpr->returning->typmod = -1; } + jsexpr->collation = get_typcollation(jsexpr->returning->typid); /* * Override whatever transformJsonOutput() set these to, which @@ -4606,11 +4617,15 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func) } /* Assume NULL ON EMPTY when ON EMPTY is not specified. */ - jsexpr->on_empty = transformJsonBehavior(pstate, func->on_empty, + jsexpr->on_empty = transformJsonBehavior(pstate, + jsexpr, + func->on_empty, JSON_BEHAVIOR_NULL, jsexpr->returning); /* Assume NULL ON ERROR when ON ERROR is not specified. */ - jsexpr->on_error = transformJsonBehavior(pstate, func->on_error, + jsexpr->on_error = transformJsonBehavior(pstate, + jsexpr, + func->on_error, JSON_BEHAVIOR_NULL, jsexpr->returning); break; @@ -4621,6 +4636,7 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func) jsexpr->returning->typid = exprType(jsexpr->formatted_expr); jsexpr->returning->typmod = -1; } + jsexpr->collation = get_typcollation(jsexpr->returning->typid); /* * Assume EMPTY ARRAY ON ERROR when ON ERROR is not specified. @@ -4628,7 +4644,9 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func) * ON EMPTY cannot be specified at the top level but it can be for * the individual columns. */ - jsexpr->on_error = transformJsonBehavior(pstate, func->on_error, + jsexpr->on_error = transformJsonBehavior(pstate, + jsexpr, + func->on_error, JSON_BEHAVIOR_EMPTY_ARRAY, jsexpr->returning); break; @@ -4704,7 +4722,8 @@ ValidJsonBehaviorDefaultExpr(Node *expr, void *context) * Transform a JSON BEHAVIOR clause. */ static JsonBehavior * -transformJsonBehavior(ParseState *pstate, JsonBehavior *behavior, +transformJsonBehavior(ParseState *pstate, JsonExpr *jsexpr, + JsonBehavior *behavior, JsonBehaviorType default_behavior, JsonReturning *returning) { @@ -4712,6 +4731,7 @@ transformJsonBehavior(ParseState *pstate, JsonBehavior *behavior, Node *expr = NULL; bool coerce_at_runtime = false; int location = -1; + Oid exprcoll; if (behavior) { @@ -4720,6 +4740,29 @@ transformJsonBehavior(ParseState *pstate, JsonBehavior *behavior, if (btype == JSON_BEHAVIOR_DEFAULT) { expr = transformExprRecurse(pstate, behavior->expr); + + exprcoll = exprCollation(expr); + if (!OidIsValid(exprcoll)) + exprcoll = get_typcollation(exprType(expr)); + + if (jsexpr->collation != exprcoll && OidIsValid(exprcoll) && OidIsValid(jsexpr->collation)) + ereport(ERROR, + errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("the collation of DEFAULT expression conflicts with RETURNING caluse"), + errdetail("\"%s\" versus \"%s\"", + get_collation_name(exprcoll), + get_collation_name(jsexpr->collation)), + parser_errposition(pstate, exprLocation(expr))); + + /* + * Strip any top-level COLLATE clause because exprSetCollation can + * not cope with explit COLLATE clause. This is safe because we + * have already verified that the DEFAULT expression's explicit + * collation matches the RETURNING type's collation. + */ + while (IsA(expr, CollateExpr)) + expr = (Node *) ((CollateExpr *) expr)->arg; + if (!ValidJsonBehaviorDefaultExpr(expr, NULL)) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), diff --git a/src/test/regress/expected/sqljson_queryfuncs.out b/src/test/regress/expected/sqljson_queryfuncs.out index 5a35aeb7bba..871d25e3287 100644 --- a/src/test/regress/expected/sqljson_queryfuncs.out +++ b/src/test/regress/expected/sqljson_queryfuncs.out @@ -1464,3 +1464,51 @@ SELECT JSON_VALUE(jsonb '1234', '$' RETURNING bit(3) DEFAULT 1::bit(3) ON ERROR SELECT JSON_VALUE(jsonb '"111"', '$.a' RETURNING bit(3) DEFAULT '1111' ON EMPTY); ERROR: bit string length 4 does not match type bit(3) DROP DOMAIN queryfuncs_d_varbit3; +CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false); +create domain d1 as text collate case_insensitive; +create domain d2 as text collate "C"; +select json_value('{"a": "A"}', '$.a' returning d1 default ('C' collate "C") collate case_insensitive on empty) = 'a'; --true + ?column? +---------- + t +(1 row) + +select json_value('{"a": "A"}', '$.a' returning d1 default 'C' on empty) = 'a'; --true + ?column? +---------- + t +(1 row) + +select json_value('{"a": "A"}', '$.a' returning d1 default 'C'::d2 on empty) = 'a'; --error +ERROR: the collation of DEFAULT expression conflicts with RETURNING caluse +LINE 1: ...on_value('{"a": "A"}', '$.a' returning d1 default 'C'::d2 on... + ^ +DETAIL: "C" versus "case_insensitive" +select json_value('{"a": "A"}', '$.a' returning d1 default 'C' collate "C" on empty) = 'a'; --error +ERROR: the collation of DEFAULT expression conflicts with RETURNING caluse +LINE 1: ...on_value('{"a": "A"}', '$.a' returning d1 default 'C' collat... + ^ +DETAIL: "C" versus "case_insensitive" +select json_value('{"a": "A"}', '$.c' returning d1 default 'A' on empty) = 'a'; --true + ?column? +---------- + t +(1 row) + +select json_value('{"a": "A"}', '$.c' returning d1 default 'A' collate case_insensitive on empty) = 'a'; --true + ?column? +---------- + t +(1 row) + +select json_value('{"a": "A"}', '$.c' returning d1 default 'A'::d2 on empty) = 'a'; --error +ERROR: the collation of DEFAULT expression conflicts with RETURNING caluse +LINE 1: ...on_value('{"a": "A"}', '$.c' returning d1 default 'A'::d2 on... + ^ +DETAIL: "C" versus "case_insensitive" +select json_value('{"a": "A"}', '$.c' returning d1 default 'A' collate "C" on empty) = 'a'; --error +ERROR: the collation of DEFAULT expression conflicts with RETURNING caluse +LINE 1: ...on_value('{"a": "A"}', '$.c' returning d1 default 'A' collat... + ^ +DETAIL: "C" versus "case_insensitive" +drop domain d1, d2; diff --git a/src/test/regress/sql/sqljson_queryfuncs.sql b/src/test/regress/sql/sqljson_queryfuncs.sql index 8d7b225b612..4c600f884b3 100644 --- a/src/test/regress/sql/sqljson_queryfuncs.sql +++ b/src/test/regress/sql/sqljson_queryfuncs.sql @@ -500,3 +500,18 @@ SELECT JSON_VALUE(jsonb '1234', '$' RETURNING bit(3) DEFAULT 1 ON ERROR); SELECT JSON_VALUE(jsonb '1234', '$' RETURNING bit(3) DEFAULT 1::bit(3) ON ERROR); SELECT JSON_VALUE(jsonb '"111"', '$.a' RETURNING bit(3) DEFAULT '1111' ON EMPTY); DROP DOMAIN queryfuncs_d_varbit3; + +CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false); +create domain d1 as text collate case_insensitive; +create domain d2 as text collate "C"; + +select json_value('{"a": "A"}', '$.a' returning d1 default ('C' collate "C") collate case_insensitive on empty) = 'a'; --true +select json_value('{"a": "A"}', '$.a' returning d1 default 'C' on empty) = 'a'; --true +select json_value('{"a": "A"}', '$.a' returning d1 default 'C'::d2 on empty) = 'a'; --error +select json_value('{"a": "A"}', '$.a' returning d1 default 'C' collate "C" on empty) = 'a'; --error + +select json_value('{"a": "A"}', '$.c' returning d1 default 'A' on empty) = 'a'; --true +select json_value('{"a": "A"}', '$.c' returning d1 default 'A' collate case_insensitive on empty) = 'a'; --true +select json_value('{"a": "A"}', '$.c' returning d1 default 'A'::d2 on empty) = 'a'; --error +select json_value('{"a": "A"}', '$.c' returning d1 default 'A' collate "C" on empty) = 'a'; --error +drop domain d1, d2; \ No newline at end of file -- 2.34.1
