On Mon, Apr 12, 2021 at 10:07 PM James Coleman <jtc...@gmail.com> wrote:
>
> On Mon, Apr 12, 2021 at 7:49 PM David Rowley <dgrowle...@gmail.com> wrote:
> >
> > On Tue, 13 Apr 2021 at 11:42, Tom Lane <t...@sss.pgh.pa.us> wrote:
> > >
> > > David Rowley <dgrowle...@gmail.com> writes:
> > > > I realised when working on something unrelated last night that we can
> > > > also do hash lookups for NOT IN too.
> > >
> > > ... and still get the behavior right for nulls?
> >
> > Yeah, it will. There are already some special cases for NULLs in the
> > IN version. Those would need to be adapted for NOT IN.
>
> I hadn't thought about using the negator operator directly that way
> when I initially wrote the patch.
>
> But also I didn't think a whole lot about the NOT IN case at all --
> and there's no mention of such that I see in this thread or the
> precursor thread. It's pretty obvious that it wasn't part of my
> immediate need, but obviously it'd be nice to have the consistency.
>
> All that to say this: my vote would be to put it into PG15 also.

...and here's a draft patch. I can take this to a new thread if you'd
prefer; the one here already got committed, on the other hand this is
pretty strongly linked to this discussion, so I figured it made sense
to post it here.

James
From 08c37c5b228a0a7e9546a481a789eb1c384fcfc7 Mon Sep 17 00:00:00 2001
From: jcoleman <jtc...@gmail.com>
Date: Tue, 13 Apr 2021 13:36:38 -0400
Subject: [PATCH v1] Add HashedScalarArrayOp support for NOT IN

---
 src/backend/optimizer/prep/prepqual.c     |  1 +
 src/backend/optimizer/util/clauses.c      |  4 +-
 src/backend/parser/parse_oper.c           |  1 +
 src/include/nodes/primnodes.h             |  1 +
 src/test/regress/expected/expressions.out | 90 +++++++++++++++++++++++
 src/test/regress/sql/expressions.sql      | 31 ++++++++
 6 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/src/backend/optimizer/prep/prepqual.c b/src/backend/optimizer/prep/prepqual.c
index 42c3e4dc04..2c29993b13 100644
--- a/src/backend/optimizer/prep/prepqual.c
+++ b/src/backend/optimizer/prep/prepqual.c
@@ -129,6 +129,7 @@ negate_clause(Node *node)
 					newopexpr->opfuncid = InvalidOid;
 					newopexpr->hashfuncid = InvalidOid;
 					newopexpr->useOr = !saopexpr->useOr;
+					newopexpr->isNegator = true;
 					newopexpr->inputcollid = saopexpr->inputcollid;
 					newopexpr->args = saopexpr->args;
 					newopexpr->location = saopexpr->location;
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 526997327c..99e688426e 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -2137,8 +2137,8 @@ convert_saop_to_hashed_saop_walker(Node *node, void *context)
 		Oid			lefthashfunc;
 		Oid			righthashfunc;
 
-		if (saop->useOr && arrayarg && IsA(arrayarg, Const) &&
-			!((Const *) arrayarg)->constisnull &&
+		if ((saop->useOr || (!saop->useOr && saop->isNegator)) && arrayarg &&
+			IsA(arrayarg, Const) && !((Const *) arrayarg)->constisnull &&
 			get_op_hash_functions(saop->opno, &lefthashfunc, &righthashfunc) &&
 			lefthashfunc == righthashfunc)
 		{
diff --git a/src/backend/parser/parse_oper.c b/src/backend/parser/parse_oper.c
index c379d72fce..222f15719a 100644
--- a/src/backend/parser/parse_oper.c
+++ b/src/backend/parser/parse_oper.c
@@ -896,6 +896,7 @@ make_scalar_array_op(ParseState *pstate, List *opname,
 	result->opfuncid = opform->oprcode;
 	result->hashfuncid = InvalidOid;
 	result->useOr = useOr;
+	result->isNegator = strcmp("<>", NameStr(opform->oprname)) == 0;
 	/* inputcollid will be set by parse_collate.c */
 	result->args = args;
 	result->location = location;
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 9ae851d847..819856395e 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -592,6 +592,7 @@ typedef struct ScalarArrayOpExpr
 	Oid			opfuncid;		/* PG_PROC OID of comparison function */
 	Oid			hashfuncid;		/* PG_PROC OID of hash func or InvalidOid */
 	bool		useOr;			/* true for ANY, false for ALL */
+	bool		isNegator;		/* true if NOT has been applied to opno */
 	Oid			inputcollid;	/* OID of collation that operator should use */
 	List	   *args;			/* the scalar and array operands */
 	int			location;		/* token location, or -1 if unknown */
diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out
index 5944dfd5e1..2e88f1ca19 100644
--- a/src/test/regress/expected/expressions.out
+++ b/src/test/regress/expected/expressions.out
@@ -216,6 +216,61 @@ select return_text_input('a') in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', '
  t
 (1 row)
 
+-- NOT IN
+select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+ ?column? 
+----------
+ f
+(1 row)
+
+select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 0);
+ ?column? 
+----------
+ t
+(1 row)
+
+select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 2, null);
+ ?column? 
+----------
+ 
+(1 row)
+
+select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null);
+ ?column? 
+----------
+ f
+(1 row)
+
+select return_int_input(1) not in (null, null, null, null, null, null, null, null, null, null, null);
+ ?column? 
+----------
+ 
+(1 row)
+
+select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+ ?column? 
+----------
+ 
+(1 row)
+
+select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 0);
+ ?column? 
+----------
+ 
+(1 row)
+
+select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
+ ?column? 
+----------
+ 
+(1 row)
+
+select return_text_input('a') not in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j');
+ ?column? 
+----------
+ f
+(1 row)
+
 rollback;
 -- Test with non-strict equality function.
 -- We need to create our own type for this.
@@ -242,6 +297,11 @@ begin
   end if;
 end;
 $$ language plpgsql immutable;
+create function myintne(myint, myint) returns bool as $$
+begin
+  return not myinteq($1, $2);
+end;
+$$ language plpgsql immutable;
 create operator = (
   leftarg    = myint,
   rightarg   = myint,
@@ -252,6 +312,16 @@ create operator = (
   join       = eqjoinsel,
   merges
 );
+create operator <> (
+  leftarg    = myint,
+  rightarg   = myint,
+  commutator = <>,
+  negator    = =,
+  procedure  = myintne,
+  restrict   = eqsel,
+  join       = eqjoinsel,
+  merges
+);
 create operator class myint_ops
 default for type myint using hash as
   operator    1   =  (myint, myint),
@@ -266,6 +336,16 @@ select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6
  
 (2 rows)
 
+select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null);
+ a 
+---
+(0 rows)
+
+select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null);
+ a 
+---
+(0 rows)
+
 -- ensure the result matched with the non-hashed version.  We simply remove
 -- some array elements so that we don't reach the hashing threshold.
 select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint, null);
@@ -275,4 +355,14 @@ select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,
  
 (2 rows)
 
+select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint, null);
+ a 
+---
+(0 rows)
+
+select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint, null);
+ a 
+---
+(0 rows)
+
 rollback;
diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql
index b3fd1b5ecb..6f9404c3be 100644
--- a/src/test/regress/sql/expressions.sql
+++ b/src/test/regress/sql/expressions.sql
@@ -93,6 +93,16 @@ select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null);
 select return_int_input(null::int) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
 select return_int_input(null::int) in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
 select return_text_input('a') in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j');
+-- NOT IN
+select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 0);
+select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 2, null);
+select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null);
+select return_int_input(1) not in (null, null, null, null, null, null, null, null, null, null, null);
+select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 0);
+select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
+select return_text_input('a') not in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j');
 
 rollback;
 
@@ -124,6 +134,12 @@ begin
 end;
 $$ language plpgsql immutable;
 
+create function myintne(myint, myint) returns bool as $$
+begin
+  return not myinteq($1, $2);
+end;
+$$ language plpgsql immutable;
+
 create operator = (
   leftarg    = myint,
   rightarg   = myint,
@@ -135,6 +151,17 @@ create operator = (
   merges
 );
 
+create operator <> (
+  leftarg    = myint,
+  rightarg   = myint,
+  commutator = <>,
+  negator    = =,
+  procedure  = myintne,
+  restrict   = eqsel,
+  join       = eqjoinsel,
+  merges
+);
+
 create operator class myint_ops
 default for type myint using hash as
   operator    1   =  (myint, myint),
@@ -145,8 +172,12 @@ insert into inttest values(1::myint),(null);
 
 -- try an array with enough elements to cause hashing
 select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null);
+select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null);
+select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null);
 -- ensure the result matched with the non-hashed version.  We simply remove
 -- some array elements so that we don't reach the hashing threshold.
 select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint, null);
+select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint, null);
+select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint, null);
 
 rollback;
-- 
2.17.1

Reply via email to