On 09.04.2018 20:05, Teodor Sigaev wrote:
Hi!

12 years ago I proposed patch to which could "union" OR clauses into one range clause if it's possible. In that time pgsql could not use IS NULL as index clause, so patch doesn't support that

https://www.postgresql.org/message-id/flat/45742C51.9020602%40sigaev.ru

option number 4), all other are already committed.

It seems to be slightly different optimization.
Attached please find small patch which extends simplify_and_arguments in clauses.c to eliminated redundant checks. It doesn't perform complete constrains propagation and not using predicate_implied_by/predicate_refuted_by because them seems to be too expensive and essentially increase query optimization time. Instead of it it just strict match comparison of predicates with some extra logic for handling negators.

With this patch constructed query plans are optimal:

postgres=# create table foo(x integer primary key, y integer);
CREATE TABLE
postgres=# insert into foo (x) values (generate_series(1,100000));
INSERT 0 100000
postgres=# insert into foo (x,y) values (generate_series(100001,200000), 1);
INSERT 0 100000
postgres=# vacuum analyze foo;
VACUUM
postgres=# explain select * from foo where not (x < 99999 and y is not null) and (x <= 100001 and y is not null);
                             QUERY PLAN
--------------------------------------------------------------------
 Index Scan using foo_pkey on foo  (cost=0.42..8.48 rows=2 width=8)
   Index Cond: ((x <= 100001) AND (x >= 99999))
   Filter: (y IS NOT NULL)
(3 rows)

postgres=# explain select * from foo where  x <= 100001 and y is not null and not (x < 99999 and y is not null);
                             QUERY PLAN
--------------------------------------------------------------------
 Index Scan using foo_pkey on foo  (cost=0.42..8.48 rows=2 width=8)
   Index Cond: ((x <= 100001) AND (x >= 99999))
   Filter: (y IS NOT NULL)
(3 rows)





Konstantin Knizhnik wrote:
Hi hackers,

Postgres optimizer is not able to build efficient execution plan for the following query:

explain select * from  people_raw where not ("ID"<2068113880 AND "INN" is not null) and "ID"<=2068629726 AND "INN" is not null;
                                          QUERY PLAN
--------------------------------------------------------------------------------------------   Bitmap Heap Scan on people_raw (cost=74937803.72..210449640.49 rows=121521030 width=336)
    Recheck Cond: ("ID" <= 2068629726)
    Filter: (("INN" IS NOT NULL) AND (("ID" >= 2068113880) OR ("INN" IS NULL)))     ->  Bitmap Index Scan on "People_pkey" (cost=0.00..74907423.47 rows=2077021718 width=0)
          Index Cond: ("ID" <= 2068629726)
(5 rows)


Here the table is very large, but query effects only relatively small number of rows located in the range: [2068113880,2068629726]
But unfortunately optimizer doesn't take it into the account.
Moreover, using "is not null" and "not null" is both operands of AND is not smart:       (("INN" IS NOT NULL) AND (("ID" >= 2068113880) OR ("INN" IS NULL)))

If I remove "is not null" condition, then plan is perfect:

explain select * from  people_raw where not ("ID"<2068113880) and "ID"<=2068629726;
                                          QUERY PLAN
--------------------------------------------------------------------------------------------   Index Scan using "People_pkey" on people_raw (cost=0.58..196745.57 rows=586160 width=336)
    Index Cond: (("ID" >= 2068113880) AND ("ID" <= 2068629726))
(2 rows)

Before starting  investigation of the problem, I will like to know opinion and may be some advise of people familiar with optimizer:
how difficult will be to handle this case and where to look.

Thanks in advance,



--
Konstantin Knizhnik
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index ed6b680..488d0e6 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -322,6 +322,133 @@ and_clause(Node *clause)
 }
 
 /*
+ * Returns t iff its argument is an 'true' constant
+ */
+static bool
+is_true(Node *clause)
+{
+	return IsA(clause, Const) &&
+		!((Const *) clause)->constisnull &&
+		DatumGetBool(((Const *) clause)->constvalue);
+}
+
+/*
+ * Returns t iff its argument is an 'not' clause: (NOT { expr }).
+ */
+static bool
+is_not(Node *clause)
+{
+	return IsA(clause, BoolExpr) &&
+		((BoolExpr *) clause)->boolop == NOT_EXPR;
+}
+
+static bool is_negator(Node *c1, Node *c2);
+
+
+/*
+ * Returns t iff two expressions are the same or one of them is 'not' clause is it's argument is negator of other expression
+ */
+static bool
+is_equal(Node *c1, Node *c2)
+{
+	if (equal(c1, c2))
+		return true;
+
+	if ((is_not(c1) && is_negator(linitial(((BoolExpr*)c1)->args), c2)) ||
+		(is_not(c2) && is_negator(linitial(((BoolExpr*)c2)->args), c1)))
+		return true;
+
+	return false;
+}
+
+/*
+ * Returns t iff two expressions are negators of each other
+ */
+static bool
+is_negator(Node *c1, Node *c2)
+{
+	if ((is_not(c1) && is_equal(linitial(((BoolExpr*)c1)->args), c2)) ||
+		(is_not(c2) && is_equal(linitial(((BoolExpr*)c2)->args), c1)))
+		return true;
+
+	if (IsA(c1, NullTest) && IsA(c2, NullTest))
+		return (((NullTest*)c1)->nulltesttype == IS_NOT_NULL && ((NullTest*)c2)->nulltesttype == IS_NULL)
+			|| (((NullTest*)c2)->nulltesttype == IS_NOT_NULL && ((NullTest*)c1)->nulltesttype == IS_NULL);
+
+	return false;
+}
+
+/*
+ * Exclude from expression predicates with matches with one of conjuncts
+ */
+static Node*
+exclude_redundand_checks(Node* clause, List* conjuncts)
+{
+	if (IsA(clause, BoolExpr))
+	{
+		ListCell *c1, *c2;
+		BoolExpr* be = (BoolExpr*)clause;
+		List* new_args = NULL;
+		foreach (c1, be->args)
+		{
+			Node* p1 = (Node*)lfirst(c1);
+			foreach (c2, conjuncts)
+			{
+				Node* p2 = (Node*)lfirst(c2);
+				if (is_equal(p1, p2))
+				{
+					switch (be->boolop)
+					{
+					  case OR_EXPR:
+						return makeBoolConst(true, false); /* one of disjuncts is always true, so OR clause is redundant */
+					  case NOT_EXPR:
+						return makeBoolConst(false, false); /* conjunct inversion => false */
+					  case AND_EXPR:
+						goto nextArg; /* just skip this disjunct */
+					  default:
+						break;
+					}
+				}
+				if (is_negator(p1, p2))
+				{
+					switch (be->boolop)
+					{
+					  case AND_EXPR:
+						return makeBoolConst(false, false); /* one of conjuncts is always false, so  clause is redundant */
+					  case NOT_EXPR:
+						return makeBoolConst(true, false); /* inversion of conjunct's negator => true */
+					  case OR_EXPR:
+						goto nextArg; /* just skip this conjunct */
+					  default:
+						break;
+					}
+				}
+			}
+			new_args = lappend(new_args, exclude_redundand_checks(p1, conjuncts));
+		  nextArg:;
+		}
+		if (new_args == NULL)
+			/* empty arguments list corresponds to true for 'and' and to false for 'or' operators */
+			return makeBoolConst(be->boolop == AND_EXPR, false);
+
+		switch (be->boolop)
+		{
+		  case AND_EXPR:
+			return list_length(new_args) == 1
+				? (Expr *) linitial(new_args)
+				: make_andclause(new_args);
+		  case OR_EXPR:
+			return list_length(new_args) == 1
+				? (Expr *) linitial(new_args)
+				: make_orclause(new_args);
+		  default:
+			break;
+		}
+	}
+	return clause;
+}
+
+/*
  * make_andclause
  *
  * Creates an 'and' clause given a list of its subclauses.
@@ -334,6 +461,7 @@ make_andclause(List *andclauses)
 	expr->boolop = AND_EXPR;
 	expr->args = andclauses;
 	expr->location = -1;
+
 	return (Expr *) expr;
 }
 
@@ -3852,10 +3980,21 @@ simplify_and_arguments(List *args,
 					   bool *haveNull, bool *forceFalse)
 {
 	List	   *newargs = NIL;
-	List	   *unprocessed_args;
+	List	   *unprocessed_args = NULL;
+	List       *complex_args = NULL;
+	ListCell   *c;
+
+	/* Copy list and place complex arguments to the tail to make it possible to simplify them using simple arguments */
+	foreach (c, args)
+	{
+		Node *expr = lfirst(c);
+		if (IsA(expr, BoolExpr) && ((BoolExpr*)expr)->boolop != AND_EXPR)
+			complex_args = lappend(complex_args, expr);
+		else
+			unprocessed_args = lappend(unprocessed_args, expr);
+	}
+	unprocessed_args = list_concat(unprocessed_args, complex_args);
 
-	/* See comments in simplify_or_arguments */
-	unprocessed_args = list_copy(args);
 	while (unprocessed_args)
 	{
 		Node	   *arg = (Node *) linitial(unprocessed_args);
@@ -3921,9 +4060,11 @@ simplify_and_arguments(List *args,
 			/* otherwise, we can drop the constant-true input */
 			continue;
 		}
+		arg = exclude_redundand_checks(arg, newargs);
 
 		/* else emit the simplified arg into the result list */
-		newargs = lappend(newargs, arg);
+		if (!is_true(arg))
+			newargs = lappend(newargs, arg);
 	}
 
 	return newargs;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 84c6e9b..7aa50a0 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3206,7 +3206,7 @@ select * from tenk1 a join tenk1 b on
                                                       QUERY PLAN                                                      
 ----------------------------------------------------------------------------------------------------------------------
  Nested Loop
-   Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4)))
+   Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((b.hundred = 4) AND ((a.unique2 = 3) OR (a.unique2 = 7))))
    ->  Bitmap Heap Scan on tenk1 b
          Recheck Cond: ((unique1 = 2) OR (hundred = 4))
          ->  BitmapOr

Reply via email to