Hi, hackers!
I found a problem with selectivity estimation for NULL-returning operators. matchingsel() is not ready to use as a restriction selectivity estimator for operators like our jsonpath operators @? and @@, because it calls operator function on values obtained from pg_statistic through plain FunctionCall2Coll() which does not accept NULL results (see mcv_selectivity() etc.). =# CREATE TABLE test AS SELECT '{}'::jsonb js FROM generate_series(1, 1000); =# ANALYZE test; =# SELECT * FROM test WHERE js @@ '$ == 1'; ERROR: function 4011 returned NULL I'm not sure what we should to fix: operators or matchingsel(). So, attached two possible independent fixes: 1. Return FALSE instead of NULL in jsonpath operators. The corresponding functions jsonb_path_exists() and jsonb_path_match() still return NULL in error cases. 2. Fix NULL operator results in selectivity estimation functions. Introduced BoolFunctionCall2Coll() for replacing NULL with FALSE, that is used for calling non-comparison operators (I'm not sure that comparison can return NULLs). Maybe it is worth add a whole set of functions to fmgr.c for replacing NULL results with the specified default Datum value. If the selectivity estimation code will be left unchanged, then I think it should be noted in documentation that matchingsel() is not applicable to NULL-returning operators (there is already a similar note about hash-joinable operators). But if we will fix NULL handling, I think it would be worth to fix it everywhere in the selectivity estimation code. Without this, completely wrong results can be get not only for NULL values, but also for NULL operator results: =# EXPLAIN SELECT * FROM test WHERE NOT js @@ '$ == 1'; -- 0 rows returned QUERY PLAN -------------------------------------------------------- Seq Scan on test (cost=0.00..17.50 rows=1000 width=5) Filter: (NOT (js @@ '($ == 1)'::jsonpath)) (2 rows) -- Nikita Glukhov Postgres Professional: http://www.postgrespro.com The Russian Postgres Company
>From 92ff39dca968c5b0c7ac31b0b495780c9a9482a7 Mon Sep 17 00:00:00 2001 From: Nikita Glukhov <n.glu...@postgrespro.ru> Date: Fri, 17 Apr 2020 18:05:48 +0300 Subject: [PATCH] Return FALSE instead of NULL in jsonpath operators --- src/backend/utils/adt/jsonpath_exec.c | 25 ++++++++++++++----------- src/test/regress/expected/jsonb_jsonpath.out | 28 ++++++++++++++-------------- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/src/backend/utils/adt/jsonpath_exec.c b/src/backend/utils/adt/jsonpath_exec.c index bc06306..2c216ff 100644 --- a/src/backend/utils/adt/jsonpath_exec.c +++ b/src/backend/utils/adt/jsonpath_exec.c @@ -269,7 +269,7 @@ static int compareDatetime(Datum val1, Oid typid1, Datum val2, Oid typid2, * an analogy in SQL/JSON, so we define its behavior on our own. */ static Datum -jsonb_path_exists_internal(FunctionCallInfo fcinfo, bool tz) +jsonb_path_exists_internal(FunctionCallInfo fcinfo, bool tz, bool null_on_error) { Jsonb *jb = PG_GETARG_JSONB_P(0); JsonPath *jp = PG_GETARG_JSONPATH_P(1); @@ -288,7 +288,7 @@ jsonb_path_exists_internal(FunctionCallInfo fcinfo, bool tz) PG_FREE_IF_COPY(jb, 0); PG_FREE_IF_COPY(jp, 1); - if (jperIsError(res)) + if (jperIsError(res) && null_on_error) PG_RETURN_NULL(); PG_RETURN_BOOL(res == jperOk); @@ -297,13 +297,13 @@ jsonb_path_exists_internal(FunctionCallInfo fcinfo, bool tz) Datum jsonb_path_exists(PG_FUNCTION_ARGS) { - return jsonb_path_exists_internal(fcinfo, false); + return jsonb_path_exists_internal(fcinfo, false, true); } Datum jsonb_path_exists_tz(PG_FUNCTION_ARGS) { - return jsonb_path_exists_internal(fcinfo, true); + return jsonb_path_exists_internal(fcinfo, true, true); } /* @@ -315,7 +315,7 @@ Datum jsonb_path_exists_opr(PG_FUNCTION_ARGS) { /* just call the other one -- it can handle both cases */ - return jsonb_path_exists_internal(fcinfo, false); + return jsonb_path_exists_internal(fcinfo, false, false); } /* @@ -324,7 +324,7 @@ jsonb_path_exists_opr(PG_FUNCTION_ARGS) * See jsonb_path_exists() comment for details regarding error handling. */ static Datum -jsonb_path_match_internal(FunctionCallInfo fcinfo, bool tz) +jsonb_path_match_internal(FunctionCallInfo fcinfo, bool tz, bool null_on_error) { Jsonb *jb = PG_GETARG_JSONB_P(0); JsonPath *jp = PG_GETARG_JSONPATH_P(1); @@ -351,7 +351,7 @@ jsonb_path_match_internal(FunctionCallInfo fcinfo, bool tz) PG_RETURN_BOOL(jbv->val.boolean); if (jbv->type == jbvNull) - PG_RETURN_NULL(); + silent = true; /* supress errors and return NULL/FALSE */ } if (!silent) @@ -359,19 +359,22 @@ jsonb_path_match_internal(FunctionCallInfo fcinfo, bool tz) (errcode(ERRCODE_SINGLETON_SQL_JSON_ITEM_REQUIRED), errmsg("single boolean result is expected"))); - PG_RETURN_NULL(); + if (null_on_error) + PG_RETURN_NULL(); + else + PG_RETURN_BOOL(false); } Datum jsonb_path_match(PG_FUNCTION_ARGS) { - return jsonb_path_match_internal(fcinfo, false); + return jsonb_path_match_internal(fcinfo, false, true); } Datum jsonb_path_match_tz(PG_FUNCTION_ARGS) { - return jsonb_path_match_internal(fcinfo, true); + return jsonb_path_match_internal(fcinfo, true, true); } /* @@ -383,7 +386,7 @@ Datum jsonb_path_match_opr(PG_FUNCTION_ARGS) { /* just call the other one -- it can handle both cases */ - return jsonb_path_match_internal(fcinfo, false); + return jsonb_path_match_internal(fcinfo, false, false); } /* diff --git a/src/test/regress/expected/jsonb_jsonpath.out b/src/test/regress/expected/jsonb_jsonpath.out index 83a050d..75d75cd 100644 --- a/src/test/regress/expected/jsonb_jsonpath.out +++ b/src/test/regress/expected/jsonb_jsonpath.out @@ -31,7 +31,7 @@ select jsonb '{"a": 12}' @? '$.a + 2'; select jsonb '{"a": 12}' @? '$.b + 2'; ?column? ---------- - + f (1 row) select jsonb '{"a": {"a": 12}}' @? '$.a.a'; @@ -61,7 +61,7 @@ select jsonb '{"b": {"a": 12}}' @? '$.*.b'; select jsonb '{"b": {"a": 12}}' @? 'strict $.*.b'; ?column? ---------- - + f (1 row) select jsonb '{}' @? '$.*'; @@ -115,7 +115,7 @@ select jsonb '[1]' @? '$[1]'; select jsonb '[1]' @? 'strict $[1]'; ?column? ---------- - + f (1 row) select jsonb_path_query('[1]', 'strict $[1]'); @@ -128,13 +128,13 @@ select jsonb_path_query('[1]', 'strict $[1]', silent => true); select jsonb '[1]' @? 'lax $[10000000000000000]'; ?column? ---------- - + f (1 row) select jsonb '[1]' @? 'strict $[10000000000000000]'; ?column? ---------- - + f (1 row) select jsonb_path_query('[1]', 'lax $[10000000000000000]'); @@ -174,7 +174,7 @@ select jsonb '[1]' @? '$[1.2]'; select jsonb '[1]' @? 'strict $[1.2]'; ?column? ---------- - + f (1 row) select jsonb '{"a": [1,2,3], "b": [3,4,5]}' @? '$ ? (@.a[*] > @.b[*])'; @@ -1097,13 +1097,13 @@ select jsonb '[1,"2",0,3]' @? '-$[*]'; select jsonb '["1",2,0,3]' @? 'strict -$[*]'; ?column? ---------- - + f (1 row) select jsonb '[1,"2",0,3]' @? 'strict -$[*]'; ?column? ---------- - + f (1 row) -- unwrapping of operator arguments in lax mode @@ -1175,37 +1175,37 @@ select jsonb '2' @@ '$ <= 1'; select jsonb '2' @@ '$ == "2"'; ?column? ---------- - + f (1 row) select jsonb '2' @@ '1'; ?column? ---------- - + f (1 row) select jsonb '{}' @@ '$'; ?column? ---------- - + f (1 row) select jsonb '[]' @@ '$'; ?column? ---------- - + f (1 row) select jsonb '[1,2,3]' @@ '$[*]'; ?column? ---------- - + f (1 row) select jsonb '[]' @@ '$[*]'; ?column? ---------- - + f (1 row) select jsonb_path_match('[[1, true], [2, false]]', 'strict $[*] ? (@[0] > $x) [1]', '{"x": 1}'); -- 2.7.4
>From b2072b5e1dccb2dd15ddd32b542c3b90e34b5605 Mon Sep 17 00:00:00 2001 From: Nikita Glukhov <n.glu...@postgrespro.ru> Date: Thu, 16 Apr 2020 20:18:12 +0300 Subject: [PATCH] Fix NULL operator results in matchingsel() --- src/backend/utils/adt/selfuncs.c | 88 +++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 32 deletions(-) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 4fdcb07..a00b459 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -282,6 +282,30 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate) } /* + * Call binary boolean function, converting NULL result to FALSE + */ +static bool +BoolFunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2) +{ + LOCAL_FCINFO(fcinfo, 2); + Datum result; + + InitFunctionCallInfoData(*fcinfo, flinfo, 2, collation, NULL, NULL); + + fcinfo->args[0].value = arg1; + fcinfo->args[0].isnull = false; + fcinfo->args[1].value = arg2; + fcinfo->args[1].isnull = false; + + result = FunctionCallInvoke(fcinfo); + + if (fcinfo->isnull) + return false; + + return DatumGetBool(result); +} + +/* * var_eq_const --- eqsel for var = const case * * This is exported so that some other estimation functions can use it. @@ -353,15 +377,15 @@ var_eq_const(VariableStatData *vardata, Oid operator, { /* be careful to apply operator right way 'round */ if (varonleft) - match = DatumGetBool(FunctionCall2Coll(&eqproc, - sslot.stacoll, - sslot.values[i], - constval)); + match = BoolFunctionCall2Coll(&eqproc, + sslot.stacoll, + sslot.values[i], + constval); else - match = DatumGetBool(FunctionCall2Coll(&eqproc, - sslot.stacoll, - constval, - sslot.values[i])); + match = BoolFunctionCall2Coll(&eqproc, + sslot.stacoll, + constval, + sslot.values[i]); if (match) break; } @@ -726,14 +750,14 @@ mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, for (i = 0; i < sslot.nvalues; i++) { if (varonleft ? - DatumGetBool(FunctionCall2Coll(opproc, - sslot.stacoll, - sslot.values[i], - constval)) : - DatumGetBool(FunctionCall2Coll(opproc, - sslot.stacoll, - constval, - sslot.values[i]))) + BoolFunctionCall2Coll(opproc, + sslot.stacoll, + sslot.values[i], + constval) : + BoolFunctionCall2Coll(opproc, + sslot.stacoll, + constval, + sslot.values[i])) mcv_selec += sslot.numbers[i]; sumcommon += sslot.numbers[i]; } @@ -804,14 +828,14 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, for (i = n_skip; i < sslot.nvalues - n_skip; i++) { if (varonleft ? - DatumGetBool(FunctionCall2Coll(opproc, - sslot.stacoll, - sslot.values[i], - constval)) : - DatumGetBool(FunctionCall2Coll(opproc, - sslot.stacoll, - constval, - sslot.values[i]))) + BoolFunctionCall2Coll(opproc, + sslot.stacoll, + sslot.values[i], + constval) : + BoolFunctionCall2Coll(opproc, + sslot.stacoll, + constval, + sslot.values[i])) nmatch++; } result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip)); @@ -2329,10 +2353,10 @@ eqjoinsel_inner(Oid opfuncoid, { if (hasmatch2[j]) continue; - if (DatumGetBool(FunctionCall2Coll(&eqproc, - sslot1->stacoll, - sslot1->values[i], - sslot2->values[j]))) + if (BoolFunctionCall2Coll(&eqproc, + sslot1->stacoll, + sslot1->values[i], + sslot2->values[j])) { hasmatch1[i] = hasmatch2[j] = true; matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j]; @@ -2541,10 +2565,10 @@ eqjoinsel_semi(Oid opfuncoid, { if (hasmatch2[j]) continue; - if (DatumGetBool(FunctionCall2Coll(&eqproc, - sslot1->stacoll, - sslot1->values[i], - sslot2->values[j]))) + if (BoolFunctionCall2Coll(&eqproc, + sslot1->stacoll, + sslot1->values[i], + sslot2->values[j])) { hasmatch1[i] = hasmatch2[j] = true; nmatches++; -- 2.7.4