On Mon, Apr 12, 2021 at 10:07 PM James Coleman <jtc...@gmail.com> wrote: > > On Mon, Apr 12, 2021 at 7:49 PM David Rowley <dgrowle...@gmail.com> wrote: > > > > On Tue, 13 Apr 2021 at 11:42, Tom Lane <t...@sss.pgh.pa.us> wrote: > > > > > > David Rowley <dgrowle...@gmail.com> writes: > > > > I realised when working on something unrelated last night that we can > > > > also do hash lookups for NOT IN too. > > > > > > ... and still get the behavior right for nulls? > > > > Yeah, it will. There are already some special cases for NULLs in the > > IN version. Those would need to be adapted for NOT IN. > > I hadn't thought about using the negator operator directly that way > when I initially wrote the patch. > > But also I didn't think a whole lot about the NOT IN case at all -- > and there's no mention of such that I see in this thread or the > precursor thread. It's pretty obvious that it wasn't part of my > immediate need, but obviously it'd be nice to have the consistency. > > All that to say this: my vote would be to put it into PG15 also.
...and here's a draft patch. I can take this to a new thread if you'd prefer; the one here already got committed, on the other hand this is pretty strongly linked to this discussion, so I figured it made sense to post it here. James
From 08c37c5b228a0a7e9546a481a789eb1c384fcfc7 Mon Sep 17 00:00:00 2001 From: jcoleman <jtc...@gmail.com> Date: Tue, 13 Apr 2021 13:36:38 -0400 Subject: [PATCH v1] Add HashedScalarArrayOp support for NOT IN --- src/backend/optimizer/prep/prepqual.c | 1 + src/backend/optimizer/util/clauses.c | 4 +- src/backend/parser/parse_oper.c | 1 + src/include/nodes/primnodes.h | 1 + src/test/regress/expected/expressions.out | 90 +++++++++++++++++++++++ src/test/regress/sql/expressions.sql | 31 ++++++++ 6 files changed, 126 insertions(+), 2 deletions(-) diff --git a/src/backend/optimizer/prep/prepqual.c b/src/backend/optimizer/prep/prepqual.c index 42c3e4dc04..2c29993b13 100644 --- a/src/backend/optimizer/prep/prepqual.c +++ b/src/backend/optimizer/prep/prepqual.c @@ -129,6 +129,7 @@ negate_clause(Node *node) newopexpr->opfuncid = InvalidOid; newopexpr->hashfuncid = InvalidOid; newopexpr->useOr = !saopexpr->useOr; + newopexpr->isNegator = true; newopexpr->inputcollid = saopexpr->inputcollid; newopexpr->args = saopexpr->args; newopexpr->location = saopexpr->location; diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 526997327c..99e688426e 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -2137,8 +2137,8 @@ convert_saop_to_hashed_saop_walker(Node *node, void *context) Oid lefthashfunc; Oid righthashfunc; - if (saop->useOr && arrayarg && IsA(arrayarg, Const) && - !((Const *) arrayarg)->constisnull && + if ((saop->useOr || (!saop->useOr && saop->isNegator)) && arrayarg && + IsA(arrayarg, Const) && !((Const *) arrayarg)->constisnull && get_op_hash_functions(saop->opno, &lefthashfunc, &righthashfunc) && lefthashfunc == righthashfunc) { diff --git a/src/backend/parser/parse_oper.c b/src/backend/parser/parse_oper.c index c379d72fce..222f15719a 100644 --- a/src/backend/parser/parse_oper.c +++ b/src/backend/parser/parse_oper.c @@ -896,6 +896,7 @@ make_scalar_array_op(ParseState *pstate, List *opname, result->opfuncid = opform->oprcode; result->hashfuncid = InvalidOid; result->useOr = useOr; + result->isNegator = strcmp("<>", NameStr(opform->oprname)) == 0; /* inputcollid will be set by parse_collate.c */ result->args = args; result->location = location; diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 9ae851d847..819856395e 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -592,6 +592,7 @@ typedef struct ScalarArrayOpExpr Oid opfuncid; /* PG_PROC OID of comparison function */ Oid hashfuncid; /* PG_PROC OID of hash func or InvalidOid */ bool useOr; /* true for ANY, false for ALL */ + bool isNegator; /* true if NOT has been applied to opno */ Oid inputcollid; /* OID of collation that operator should use */ List *args; /* the scalar and array operands */ int location; /* token location, or -1 if unknown */ diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out index 5944dfd5e1..2e88f1ca19 100644 --- a/src/test/regress/expected/expressions.out +++ b/src/test/regress/expected/expressions.out @@ -216,6 +216,61 @@ select return_text_input('a') in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', ' t (1 row) +-- NOT IN +select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); + ?column? +---------- + f +(1 row) + +select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 0); + ?column? +---------- + t +(1 row) + +select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 2, null); + ?column? +---------- + +(1 row) + +select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null); + ?column? +---------- + f +(1 row) + +select return_int_input(1) not in (null, null, null, null, null, null, null, null, null, null, null); + ?column? +---------- + +(1 row) + +select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); + ?column? +---------- + +(1 row) + +select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 0); + ?column? +---------- + +(1 row) + +select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, null); + ?column? +---------- + +(1 row) + +select return_text_input('a') not in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'); + ?column? +---------- + f +(1 row) + rollback; -- Test with non-strict equality function. -- We need to create our own type for this. @@ -242,6 +297,11 @@ begin end if; end; $$ language plpgsql immutable; +create function myintne(myint, myint) returns bool as $$ +begin + return not myinteq($1, $2); +end; +$$ language plpgsql immutable; create operator = ( leftarg = myint, rightarg = myint, @@ -252,6 +312,16 @@ create operator = ( join = eqjoinsel, merges ); +create operator <> ( + leftarg = myint, + rightarg = myint, + commutator = <>, + negator = =, + procedure = myintne, + restrict = eqsel, + join = eqjoinsel, + merges +); create operator class myint_ops default for type myint using hash as operator 1 = (myint, myint), @@ -266,6 +336,16 @@ select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6 (2 rows) +select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); + a +--- +(0 rows) + +select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); + a +--- +(0 rows) + -- ensure the result matched with the non-hashed version. We simply remove -- some array elements so that we don't reach the hashing threshold. select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint, null); @@ -275,4 +355,14 @@ select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint, (2 rows) +select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint, null); + a +--- +(0 rows) + +select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint, null); + a +--- +(0 rows) + rollback; diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql index b3fd1b5ecb..6f9404c3be 100644 --- a/src/test/regress/sql/expressions.sql +++ b/src/test/regress/sql/expressions.sql @@ -93,6 +93,16 @@ select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null); select return_int_input(null::int) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); select return_int_input(null::int) in (10, 9, 2, 8, 3, 7, 4, 6, 5, null); select return_text_input('a') in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'); +-- NOT IN +select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); +select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 0); +select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 2, null); +select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null); +select return_int_input(1) not in (null, null, null, null, null, null, null, null, null, null, null); +select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); +select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 0); +select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, null); +select return_text_input('a') not in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'); rollback; @@ -124,6 +134,12 @@ begin end; $$ language plpgsql immutable; +create function myintne(myint, myint) returns bool as $$ +begin + return not myinteq($1, $2); +end; +$$ language plpgsql immutable; + create operator = ( leftarg = myint, rightarg = myint, @@ -135,6 +151,17 @@ create operator = ( merges ); +create operator <> ( + leftarg = myint, + rightarg = myint, + commutator = <>, + negator = =, + procedure = myintne, + restrict = eqsel, + join = eqjoinsel, + merges +); + create operator class myint_ops default for type myint using hash as operator 1 = (myint, myint), @@ -145,8 +172,12 @@ insert into inttest values(1::myint),(null); -- try an array with enough elements to cause hashing select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); +select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); +select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); -- ensure the result matched with the non-hashed version. We simply remove -- some array elements so that we don't reach the hashing threshold. select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint, null); +select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint, null); +select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint, null); rollback; -- 2.17.1