Rebased version v.22.
- Added enable_self_join_removal GUC (true is default)
- The joinquals of the relation that is being removed, redistributed in
accordance with the remove_rel_from_query () machinery.
--
Andrey Lepikhov
Postgres Professional
https://postgrespro.com
The Russian Postgres Company
>From 2d194aab7f8e43805a61901de34b28fcc4e136b4 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepik...@postgrespro.ru>
Date: Mon, 27 Jan 2020 14:51:20 +0500
Subject: [PATCH] Remove self joins v.22
---
src/backend/nodes/outfuncs.c | 19 +-
src/backend/optimizer/path/indxpath.c | 28 +-
src/backend/optimizer/path/joinpath.c | 6 +-
src/backend/optimizer/plan/analyzejoins.c | 1174 ++++++++++++++++-
src/backend/optimizer/plan/planmain.c | 7 +-
src/backend/optimizer/util/pathnode.c | 3 +-
src/backend/optimizer/util/relnode.c | 26 +-
src/backend/utils/misc/guc.c | 11 +
src/backend/utils/mmgr/aset.c | 1 -
src/include/nodes/nodes.h | 1 +
src/include/nodes/pathnodes.h | 22 +-
src/include/optimizer/pathnode.h | 4 +
src/include/optimizer/paths.h | 3 +-
src/include/optimizer/planmain.h | 7 +-
src/test/regress/expected/equivclass.out | 32 +
src/test/regress/expected/join.out | 275 ++++
src/test/regress/expected/updatable_views.out | 15 +-
src/test/regress/sql/equivclass.sql | 17 +
src/test/regress/sql/join.sql | 139 ++
19 files changed, 1683 insertions(+), 107 deletions(-)
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index d76fae44b8..7b2b2641cd 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2278,7 +2278,8 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_OID_FIELD(userid);
WRITE_BOOL_FIELD(useridiscurrent);
/* we don't try to print fdwroutine or fdw_private */
- /* can't print unique_for_rels/non_unique_for_rels; BMSes aren't Nodes */
+ WRITE_NODE_FIELD(unique_for_rels);
+ /* can't print non_unique_for_rels; BMSes aren't Nodes */
WRITE_NODE_FIELD(baserestrictinfo);
WRITE_UINT_FIELD(baserestrict_min_security);
WRITE_NODE_FIELD(joininfo);
@@ -2350,6 +2351,19 @@ _outStatisticExtInfo(StringInfo str, const StatisticExtInfo *node)
WRITE_BITMAPSET_FIELD(keys);
}
+static void
+_outUniqueRelInfo(StringInfo str, const UniqueRelInfo *node)
+{
+ WRITE_NODE_TYPE("UNIQUERELINFO");
+
+ WRITE_BITMAPSET_FIELD(outerrelids);
+ if (node->index)
+ {
+ WRITE_OID_FIELD(index->indexoid);
+ WRITE_NODE_FIELD(column_values);
+ }
+}
+
static void
_outEquivalenceClass(StringInfo str, const EquivalenceClass *node)
{
@@ -4131,6 +4145,9 @@ outNode(StringInfo str, const void *obj)
case T_StatisticExtInfo:
_outStatisticExtInfo(str, obj);
break;
+ case T_UniqueRelInfo:
+ _outUniqueRelInfo(str, obj);
+ break;
case T_ExtensibleNode:
_outExtensibleNode(str, obj);
break;
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 2a50272da6..0c6e7eee74 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -3564,7 +3564,8 @@ ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel,
* relation_has_unique_index_for
* Determine whether the relation provably has at most one row satisfying
* a set of equality conditions, because the conditions constrain all
- * columns of some unique index.
+ * columns of some unique index. If index_info is not null, it is set to
+ * point to a new UniqueRelInfo containing the index and conditions.
*
* The conditions can be represented in either or both of two ways:
* 1. A list of RestrictInfo nodes, where the caller has already determined
@@ -3585,12 +3586,16 @@ ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel,
bool
relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
List *restrictlist,
- List *exprlist, List *oprlist)
+ List *exprlist, List *oprlist,
+ UniqueRelInfo **unique_info)
{
ListCell *ic;
Assert(list_length(exprlist) == list_length(oprlist));
+ if (unique_info)
+ *unique_info = NULL;
+
/* Short-circuit if no indexes... */
if (rel->indexlist == NIL)
return false;
@@ -3641,6 +3646,7 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
{
IndexOptInfo *ind = (IndexOptInfo *) lfirst(ic);
int c;
+ List *column_values = NIL;
/*
* If the index is not unique, or not immediately enforced, or if it's
@@ -3689,6 +3695,9 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
if (match_index_to_operand(rexpr, c, ind))
{
matched = true; /* column is unique */
+ column_values = lappend(column_values, rinfo->outer_is_left
+ ? get_leftop(rinfo->clause)
+ : get_rightop(rinfo->clause));
break;
}
}
@@ -3731,7 +3740,22 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
/* Matched all key columns of this index? */
if (c == ind->nkeycolumns)
+ {
+ if (unique_info)
+ {
+ /* This may be called in GEQO memory context. */
+ MemoryContext oldContext = MemoryContextSwitchTo(root->planner_cxt);
+ *unique_info = makeNode(UniqueRelInfo);
+ (*unique_info)->index = ind;
+ (*unique_info)->column_values = list_copy(column_values);
+ MemoryContextSwitchTo(oldContext);
+ }
+ if (column_values)
+ list_free(column_values);
return true;
+ }
+ if (column_values)
+ list_free(column_values);
}
return false;
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 7d2756e234..88db5d01c8 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -176,7 +176,8 @@ add_paths_to_joinrel(PlannerInfo *root,
innerrel,
JOIN_INNER,
restrictlist,
- false);
+ false,
+ NULL /*index_info*/);
break;
default:
extra.inner_unique = innerrel_is_unique(root,
@@ -185,7 +186,8 @@ add_paths_to_joinrel(PlannerInfo *root,
innerrel,
jointype,
restrictlist,
- false);
+ false,
+ NULL /*index_info*/);
break;
}
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index d0ff660284..49ae8510cb 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -22,6 +22,7 @@
*/
#include "postgres.h"
+#include "catalog/pg_class.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/joininfo.h"
@@ -29,8 +30,12 @@
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
+#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+
+bool enable_self_join_removal;
/* local functions */
static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
@@ -39,15 +44,16 @@ static void remove_rel_from_query(PlannerInfo *root, int relid,
static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
- List *clause_list);
+ List *clause_list, UniqueRelInfo **unique_info);
static Oid distinct_col_search(int colno, List *colnos, List *opids);
static bool is_innerrel_unique_for(PlannerInfo *root,
Relids joinrelids,
Relids outerrelids,
RelOptInfo *innerrel,
JoinType jointype,
- List *restrictlist);
-
+ List *restrictlist,
+ UniqueRelInfo **unique_info);
+static void change_rinfo(RestrictInfo* rinfo, Index from, Index to);
/*
* remove_useless_joins
@@ -58,7 +64,7 @@ static bool is_innerrel_unique_for(PlannerInfo *root,
* data structures that have to be updated are accessible via "root".
*/
List *
-remove_useless_joins(PlannerInfo *root, List *joinlist)
+remove_useless_left_joins(PlannerInfo *root, List *joinlist)
{
ListCell *lc;
@@ -161,7 +167,6 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
int innerrelid;
RelOptInfo *innerrel;
Relids joinrelids;
- List *clause_list = NIL;
ListCell *l;
int attroff;
@@ -237,67 +242,24 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
}
/*
- * Search for mergejoinable clauses that constrain the inner rel against
- * either the outer rel or a pseudoconstant. If an operator is
- * mergejoinable then it behaves like equality for some btree opclass, so
- * it's what we want. The mergejoinability test also eliminates clauses
- * containing volatile functions, which we couldn't depend on.
+ * Check for pushed-down clauses referencing the inner rel. If there is
+ * such a clause then join removal has to be disallowed. We have to
+ * check this despite the previous attr_needed checks because of the
+ * possibility of pushed-down clauses referencing the rel.
*/
foreach(l, innerrel->joininfo)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
-
- /*
- * If it's not a join clause for this outer join, we can't use it.
- * Note that if the clause is pushed-down, then it is logically from
- * above the outer join, even if it references no other rels (it might
- * be from WHERE, for example).
- */
- if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
- {
- /*
- * If such a clause actually references the inner rel then join
- * removal has to be disallowed. We have to check this despite
- * the previous attr_needed checks because of the possibility of
- * pushed-down clauses referencing the rel.
- */
- if (bms_is_member(innerrelid, restrictinfo->clause_relids))
+ if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids)
+ && bms_is_member(innerrel->relid, restrictinfo->clause_relids))
return false;
- continue; /* else, ignore; not useful here */
- }
-
- /* Ignore if it's not a mergejoinable clause */
- if (!restrictinfo->can_join ||
- restrictinfo->mergeopfamilies == NIL)
- continue; /* not mergejoinable */
-
- /*
- * Check if clause has the form "outer op inner" or "inner op outer",
- * and if so mark which side is inner.
- */
- if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
- innerrel->relids))
- continue; /* no good for these input relations */
-
- /* OK, add to list */
- clause_list = lappend(clause_list, restrictinfo);
}
- /*
- * Now that we have the relevant equality join clauses, try to prove the
- * innerrel distinct.
- */
- if (rel_is_distinct_for(root, innerrel, clause_list))
- return true;
-
- /*
- * Some day it would be nice to check for other methods of establishing
- * distinctness.
- */
- return false;
+ return is_innerrel_unique_for(root, joinrelids, sjinfo->min_lefthand,
+ innerrel, sjinfo->jointype, innerrel->joininfo,
+ NULL /*unique_index*/);
}
-
/*
* Remove the target relid from the planner's data structures, having
* determined that there is no need to include it in the query.
@@ -561,7 +523,7 @@ reduce_unique_semijoins(PlannerInfo *root)
/* Test whether the innerrel is unique for those clauses. */
if (!innerrel_is_unique(root,
joinrelids, sjinfo->min_lefthand, innerrel,
- JOIN_SEMI, restrictlist, true))
+ JOIN_SEMI, restrictlist, true, NULL /*index_info*/))
continue;
/* OK, remove the SpecialJoinInfo from the list. */
@@ -636,10 +598,17 @@ rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
* Note that the passed-in clause_list may be destructively modified! This
* is OK for current uses, because the clause_list is built by the caller for
* the sole purpose of passing to this function.
+ *
+ * If unique_index is not null, it is set to point to the index that guarantees
+ * uniqueness for a base relation.
*/
static bool
-rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
+rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
+ UniqueRelInfo **unique_info)
{
+ if (unique_info)
+ *unique_info = NULL;
+
/*
* We could skip a couple of tests here if we assume all callers checked
* rel_supports_distinctness first, but it doesn't seem worth taking any
@@ -654,8 +623,8 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
* relation_has_unique_index_for automatically adds any usable
* restriction clauses for the rel, so we needn't do that here.
*/
- if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
- return true;
+ return relation_has_unique_index_for(root, rel, clause_list, NIL, NIL,
+ unique_info);
}
else if (rel->rtekind == RTE_SUBQUERY)
{
@@ -959,6 +928,10 @@ distinct_col_search(int colno, List *colnos, List *opids)
* heuristic about whether to cache negative answers; it should be "true"
* if making an inquiry that is not part of the normal bottom-up join search
* sequence.
+ *
+ * If index_info_out is not null, it is set to point to a new UniqueRelInfo
+ * allocated in root memory context, that describes the index that guarantees
+ * uniqueness.
*/
bool
innerrel_is_unique(PlannerInfo *root,
@@ -967,12 +940,23 @@ innerrel_is_unique(PlannerInfo *root,
RelOptInfo *innerrel,
JoinType jointype,
List *restrictlist,
- bool force_cache)
+ bool force_cache,
+ UniqueRelInfo **unique_info_out)
{
MemoryContext old_context;
ListCell *lc;
+ UniqueRelInfo *unique_info;
+
+ if (unique_info_out)
+ *unique_info_out = NULL;
- /* Certainly can't prove uniqueness when there are no joinclauses */
+ /*
+ * It is possible to prove uniqueness even in the absence of joinclauses,
+ * just from baserestrictinfos alone. However, in these cases the inner
+ * relation returns one row at most, so join removal won't give much
+ * benefit. It seems better to save some planning time by ignoring these
+ * cases.
+ */
if (restrictlist == NIL)
return false;
@@ -992,10 +976,14 @@ innerrel_is_unique(PlannerInfo *root,
*/
foreach(lc, innerrel->unique_for_rels)
{
- Relids unique_for_rels = (Relids) lfirst(lc);
+ unique_info = (UniqueRelInfo *) lfirst(lc);
- if (bms_is_subset(unique_for_rels, outerrelids))
+ if (bms_is_subset(unique_info->outerrelids, outerrelids))
+ {
+ if (unique_info_out)
+ *unique_info_out = unique_info;
return true; /* Success! */
+ }
}
/*
@@ -1012,7 +1000,7 @@ innerrel_is_unique(PlannerInfo *root,
/* No cached information, so try to make the proof. */
if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
- jointype, restrictlist))
+ jointype, restrictlist, &unique_info))
{
/*
* Cache the positive result for future probes, being sure to keep it
@@ -1025,10 +1013,15 @@ innerrel_is_unique(PlannerInfo *root,
* supersets of them anyway.
*/
old_context = MemoryContextSwitchTo(root->planner_cxt);
- innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
- bms_copy(outerrelids));
+ if (!unique_info)
+ unique_info = makeNode(UniqueRelInfo);
+ unique_info->outerrelids = bms_copy(outerrelids);
+ innerrel->unique_for_rels = lappend(innerrel->unique_for_rels, unique_info);
MemoryContextSwitchTo(old_context);
+ if (unique_info_out)
+ *unique_info_out = unique_info;
+
return true; /* Success! */
}
else
@@ -1074,11 +1067,15 @@ is_innerrel_unique_for(PlannerInfo *root,
Relids outerrelids,
RelOptInfo *innerrel,
JoinType jointype,
- List *restrictlist)
+ List *restrictlist,
+ UniqueRelInfo **unique_info)
{
List *clause_list = NIL;
ListCell *lc;
+ if (unique_info)
+ *unique_info = NULL;
+
/*
* Search for mergejoinable clauses that constrain the inner rel against
* the outer rel. If an operator is mergejoinable then it behaves like
@@ -1116,5 +1113,1042 @@ is_innerrel_unique_for(PlannerInfo *root,
}
/* Let rel_is_distinct_for() do the hard work */
- return rel_is_distinct_for(root, innerrel, clause_list);
+ return rel_is_distinct_for(root, innerrel, clause_list, unique_info);
+}
+
+typedef struct
+{
+ Index oldRelid;
+ Index newRelid;
+} ChangeVarnoContext;
+
+
+static bool
+change_varno_walker(Node *node, ChangeVarnoContext *context)
+{
+ if (node == NULL)
+ return false;
+
+ if (IsA(node, Var))
+ {
+ Var* var = (Var*)node;
+ if (var->varno == context->oldRelid)
+ {
+ var->varno = context->newRelid;
+ var->varnosyn = context->newRelid;
+ }
+ return false;
+ }
+ if (IsA(node, RestrictInfo))
+ {
+ change_rinfo((RestrictInfo*)node, context->oldRelid, context->newRelid);
+ return false;
+ }
+ return expression_tree_walker(node, change_varno_walker, context);
+}
+
+/*
+ * For all Vars in the expression that have varno = oldRelid, set
+ * varno = newRelid.
+ */
+static void
+change_varno(Expr *expr, Index oldRelid, Index newRelid)
+{
+ ChangeVarnoContext context;
+
+ context.oldRelid = oldRelid;
+ context.newRelid = newRelid;
+ change_varno_walker((Node *) expr, &context);
+}
+
+/*
+ * Substitute newId for oldId in relids.
+ */
+static void
+change_relid(Relids *relids, Index oldId, Index newId)
+{
+ if (bms_is_member(oldId, *relids))
+ *relids = bms_add_member(bms_del_member(bms_copy(*relids), oldId), newId);
+}
+
+static void
+change_rinfo(RestrictInfo* rinfo, Index from, Index to)
+{
+ bool is_req_equal =
+ (rinfo->required_relids == rinfo->clause_relids) ? true : false;
+
+ change_varno(rinfo->clause, from, to);
+ change_varno(rinfo->orclause, from, to);
+ change_relid(&rinfo->clause_relids, from, to);
+ if (is_req_equal)
+ rinfo->required_relids = rinfo->clause_relids;
+ else
+ change_relid(&rinfo->required_relids, from, to);
+ change_relid(&rinfo->left_relids, from, to);
+ change_relid(&rinfo->right_relids, from, to);
+ change_relid(&rinfo->outer_relids, from, to);
+ change_relid(&rinfo->nullable_relids, from, to);
+}
+
+/*
+ * Update EC members to point to the remaining relation instead of the removed
+ * one, removing duplicates.
+ */
+static void
+update_ec_members(EquivalenceClass *ec, Index toRemove, Index toKeep)
+{
+ int counter;
+
+ for (counter = 0; counter < list_length(ec->ec_members); )
+ {
+ ListCell *cell = list_nth_cell(ec->ec_members, counter);
+ EquivalenceMember *em = lfirst(cell);
+ int counter1;
+
+ if (!bms_is_member(toRemove, em->em_relids))
+ {
+ counter++;
+ continue;
+ }
+
+ change_relid(&em->em_relids, toRemove, toKeep);
+ /* We only process inner joins */
+ change_varno(em->em_expr, toRemove, toKeep);
+
+ /*
+ * After we switched the equivalence member to the remaining relation,
+ * check that it is not the same as the existing member, and if it
+ * is, delete it.
+ */
+ for (counter1 = 0; counter1 < list_length(ec->ec_members); counter1++)
+ {
+ EquivalenceMember *other;
+
+ if (counter1 == counter)
+ continue;
+
+ other = castNode(EquivalenceMember, list_nth(ec->ec_members, counter1));
+
+ if (equal(other->em_expr, em->em_expr))
+ break;
+ }
+
+ if (counter1 < list_length(ec->ec_members))
+ ec->ec_members = list_delete_cell(ec->ec_members, cell);
+ else
+ counter++;
+ }
+}
+
+/*
+ * Update EC sources to point to the remaining relation instead of the
+ * removed one.
+ */
+static void
+update_ec_sources(List **sources, Index toRemove, Index toKeep)
+{
+ int counter;
+ int cc=0;
+
+ for (counter = 0; counter < list_length(*sources); )
+ {
+ ListCell *cell = list_nth_cell(*sources, counter);
+ RestrictInfo *rinfo = castNode(RestrictInfo, lfirst(cell));
+ int counter1;
+
+ if (!bms_is_member(toRemove, rinfo->required_relids))
+ {
+ counter++;
+ continue;
+ }
+
+ change_varno(rinfo->clause, toRemove, toKeep);
+
+ /*
+ * After switching the clause to the remaining relation, check it for
+ * redundancy with existing ones. We don't have to check for
+ * redundancy with derived clauses, because we've just deleted them.
+ */
+ for (counter1 = 0; counter1 < list_length(*sources); counter1++)
+ {
+ RestrictInfo *other;
+
+ if (counter1 == counter)
+ continue;
+
+ other = castNode(RestrictInfo, list_nth(*sources, counter1));
+ if (equal(rinfo->clause, other->clause))
+ break;
+ }
+
+ if (counter1 < list_length(*sources))
+ {
+ *sources = list_delete_cell(*sources, cell);
+ cc++;
+ }
+ else
+ {
+ counter++;
+
+ /* We will keep this RestrictInfo, correct its relids. */
+ change_rinfo(rinfo, toRemove, toKeep);
+ }
+ }
+}
+
+/*
+ * Scratch space for the unique self join removal code.
+ */
+typedef struct
+{
+ PlannerInfo *root;
+
+ /* Temporary array for relation ids. */
+ Index *relids;
+
+ /*
+ * Array of Relids, one for each relation, indexed by relation id. Each
+ * element is a set of relation ids with which this relation has a special
+ * join.
+ */
+ Relids *special_join_rels;
+
+ /* Array of row marks indexed by relid. */
+ PlanRowMark **row_marks;
+
+ /* Bitmapset for join relids that is used to avoid reallocation. */
+ Relids joinrelids;
+} UsjScratch;
+
+
+
+/*
+ * Remove a relation after we have proven that it participates only in an
+ * unneeded unique self join.
+ *
+ * The joinclauses list is destructively changed.
+ */
+static void
+remove_self_join_rel(UsjScratch *scratch, Relids joinrelids, List *joinclauses,
+ RelOptInfo *toKeep, RelOptInfo *toRemove)
+{
+ PlannerInfo *root = scratch->root;
+ ListCell *cell;
+ int i;
+ List *joininfos = NIL;
+
+ /*
+ * Include all eclass mentions of removed relation into the eclass mentions
+ * of kept relation.
+ */
+ toKeep->eclass_indexes = bms_add_members(toRemove->eclass_indexes,
+ toKeep->eclass_indexes);
+
+ /*
+ * Transfer join and restriction clauses from the removed relation to the
+ * remaining one. We change the Vars of the clause to point to the
+ * remaining relation instead of the removed one. The clauses that require
+ * a subset of joinrelids become restriction clauses of the remaining
+ * relation, and others remain join clauses. We append them to
+ * baserestrictinfo and joininfo respectively, trying not to introduce
+ * duplicates.
+ *
+ * We also have to process the 'joinclauses' list here, because it
+ * contains EC-derived join clauses which must become filter clauses. It
+ * is not enough to just correct the ECs, because the EC-derived
+ * restrictions are generated before join removal (see
+ * generate_base_implied_equalities).
+ */
+
+ /* Replace removed relation in joininfo */
+ foreach(cell, toKeep->joininfo)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, cell);
+
+ change_rinfo(rinfo, toRemove->relid, toKeep->relid);
+
+ /*
+ * If this clause is a mergejoinable equality clause that compares a
+ * variable to itself, i.e., has the form of "X=X", replace it with
+ * null test.
+ */
+ if (rinfo->mergeopfamilies && IsA(rinfo->clause, OpExpr))
+ {
+ Expr *leftOp, *rightOp;
+
+ leftOp = (Expr *) get_leftop(rinfo->clause);
+ rightOp = (Expr *) get_rightop(rinfo->clause);
+
+ if (leftOp != NULL && equal(leftOp, rightOp))
+ {
+ NullTest *nullTest = makeNode(NullTest);
+ nullTest->arg = leftOp;
+ nullTest->nulltesttype = IS_NOT_NULL;
+ nullTest->argisrow = false;
+ nullTest->location = -1;
+ rinfo->clause = (Expr*)nullTest;
+ }
+ }
+ }
+
+ /* Replace removed relation in joinclauses.
+ * It is similar with code above but also adds substituted null-tests to baserestrict list of kept relation. */
+ foreach(cell, joinclauses)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, cell);
+
+ change_rinfo(rinfo, toRemove->relid, toKeep->relid);
+
+ /*
+ * If this clause is a mergejoinable equality clause that compares a
+ * variable to itself, i.e., has the form of "X=X", replace it with
+ * null test.
+ */
+ if (rinfo->mergeopfamilies && IsA(rinfo->clause, OpExpr))
+ {
+ Expr *leftOp, *rightOp;
+
+ leftOp = (Expr *) get_leftop(rinfo->clause);
+ rightOp = (Expr *) get_rightop(rinfo->clause);
+
+ if (leftOp != NULL && equal(leftOp, rightOp))
+ {
+ NullTest *nullTest = makeNode(NullTest);
+ nullTest->arg = leftOp;
+ nullTest->nulltesttype = IS_NOT_NULL;
+ nullTest->argisrow = false;
+ nullTest->location = -1;
+ rinfo->clause = (Expr*)nullTest;
+ toKeep->baserestrictinfo = lappend(toKeep->baserestrictinfo, rinfo);
+ }
+ }
+ }
+
+ /* Tranfer removed relation baserestrictinfo clauses to kept relation */
+ foreach(cell, toRemove->baserestrictinfo)
+ {
+ ListCell *otherCell;
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, cell);
+ List **target = &toKeep->baserestrictinfo;
+
+ /*
+ * Replace the references to the removed relation with references to
+ * the remaining one. We won't necessarily add this clause, because
+ * it may be already present in the joininfo or baserestrictinfo.
+ * Still, we have to switch it to point to the remaining relation.
+ * This is important for join clauses that reference both relations,
+ * because they are included in both joininfos.
+ */
+ change_rinfo(rinfo, toRemove->relid, toKeep->relid);
+
+ /*
+ * Don't add the clause if it is already present in the list, or
+ * derived from the same equivalence class, or is the same as another
+ * clause.
+ */
+ foreach (otherCell, *target)
+ {
+ RestrictInfo *other = lfirst(otherCell);
+ if (other == rinfo
+ || (rinfo->parent_ec != NULL
+ && other->parent_ec == rinfo->parent_ec)
+ || equal(rinfo->clause, other->clause))
+ {
+ break;
+ }
+ }
+
+ /* We can't have an EC-derived clause that joins to some third relation */
+ if (otherCell != NULL)
+ continue;
+
+ *target = lappend(*target, rinfo);
+ }
+
+ /* Transfer removed relation join-clauses to kept relation.
+ * It is similar with code above but also substituted redundant quals with null-tests. */
+ foreach(cell, toRemove->joininfo)
+ {
+ ListCell *otherCell;
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, cell);
+ List **target = &toKeep->joininfo;
+
+ /*
+ * Replace the references to the removed relation with references to
+ * the remaining one. We won't necessarily add this clause, because
+ * it may be already present in the joininfo or baserestrictinfo.
+ * Still, we have to switch it to point to the remaining relation.
+ * This is important for join clauses that reference both relations,
+ * because they are included in both joininfos.
+ */
+ change_rinfo(rinfo, toRemove->relid, toKeep->relid);
+
+ /*
+ * Don't add the clause if it is already present in the list, or
+ * derived from the same equivalence class, or is the same as another
+ * clause.
+ */
+ foreach (otherCell, *target)
+ {
+ RestrictInfo *other = lfirst(otherCell);
+ if (other == rinfo
+ || (rinfo->parent_ec != NULL
+ && other->parent_ec == rinfo->parent_ec)
+ || equal(rinfo->clause, other->clause))
+ {
+ break;
+ }
+ }
+
+ if (otherCell != NULL)
+ continue;
+
+ /*
+ * If this clause is a mergejoinable equality clause that compares a
+ * variable to itself, i.e., has the form of "X=X", replace it with
+ * null test.
+ */
+ if (rinfo->mergeopfamilies && IsA(rinfo->clause, OpExpr))
+ {
+ Expr *leftOp, *rightOp;
+
+ Assert(IsA(rinfo->clause, OpExpr));
+
+ leftOp = (Expr *) get_leftop(rinfo->clause);
+ rightOp = (Expr *) get_rightop(rinfo->clause);
+
+ if (leftOp != NULL && equal(leftOp, rightOp))
+ {
+ NullTest *nullTest = makeNode(NullTest);
+ nullTest->arg = leftOp;
+ nullTest->nulltesttype = IS_NOT_NULL;
+ nullTest->argisrow = false;
+ nullTest->location = -1;
+ rinfo->clause = (Expr*)nullTest;
+ }
+ }
+
+ *target = lappend(*target, rinfo);
+ toKeep->baserestrict_min_security = Min(toKeep->baserestrict_min_security, rinfo->security_level);
+ }
+
+ /*
+ * Now the state of the joininfo list of the toKeep relation is changed:
+ * some of clauses can be removed, some need to move into baserestrictinfo.
+ * To do this use the same technique as the remove_rel_from_query() routine.
+ */
+ joininfos = list_copy(toKeep->joininfo);
+ foreach(cell, joininfos)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
+
+ remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
+
+ if (RINFO_IS_PUSHED_DOWN(rinfo, joinrelids))
+ {
+ /* Recheck that qual doesn't actually reference the target rel */
+ Assert(!bms_is_member(toRemove->relid, rinfo->clause_relids));
+
+ /*
+ * The required_relids probably aren't shared with anything else,
+ * but let's copy them just to be sure.
+ */
+ rinfo->required_relids = bms_copy(rinfo->required_relids);
+ rinfo->required_relids = bms_del_member(rinfo->required_relids,
+ toRemove->relid);
+ distribute_restrictinfo_to_rels(root, rinfo);
+ }
+ }
+
+ /*
+ * Transfer the targetlist and attr_needed flags.
+ */
+ Assert(toRemove->reltarget->sortgrouprefs == 0);
+
+ foreach (cell, toRemove->reltarget->exprs)
+ {
+ Expr *node = lfirst(cell);
+ change_varno(node, toRemove->relid, toKeep->relid);
+ if (!list_member(toKeep->reltarget->exprs, node))
+ toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
+ }
+
+ for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
+ {
+ int attno = i - toKeep->min_attr;
+ toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
+ toRemove->attr_needed[attno]);
+ }
+
+ /*
+ * If the removed relation has a row mark, transfer it to the remaining
+ * one.
+ *
+ * If both rels have row marks, just keep the one corresponding to the
+ * remaining relation, because we verified earlier that they have the same
+ * strength.
+ *
+ * Also make sure that the scratch->row_marks cache is up to date, because
+ * we are going to use it for further join removals.
+ */
+ if (scratch->row_marks[toRemove->relid])
+ {
+ PlanRowMark **markToRemove = &scratch->row_marks[toRemove->relid];
+ PlanRowMark **markToKeep = &scratch->row_marks[toKeep->relid];
+
+ if (*markToKeep)
+ {
+ Assert((*markToKeep)->markType == (*markToRemove)->markType);
+
+ root->rowMarks = list_delete_ptr(root->rowMarks, *markToKeep);
+ *markToKeep = NULL;
+ }
+ else
+ {
+ *markToKeep = *markToRemove;
+ *markToRemove = NULL;
+
+ /* Shouldn't have inheritance children here. */
+ Assert((*markToKeep)->rti == (*markToKeep)->prti);
+
+ (*markToKeep)->rti = toKeep->relid;
+ (*markToKeep)->prti = toKeep->relid;
+ }
+ }
+
+ /*
+ * Likewise replace references in SpecialJoinInfo data structures.
+ *
+ * This is relevant in case the join we're deleting is nested inside some
+ * special joins: the upper joins' relid sets have to be adjusted.
+ */
+ foreach (cell, root->join_info_list)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(cell);
+
+ change_relid(&sjinfo->min_lefthand, toRemove->relid, toKeep->relid);
+ change_relid(&sjinfo->min_righthand, toRemove->relid, toKeep->relid);
+ change_relid(&sjinfo->syn_lefthand, toRemove->relid, toKeep->relid);
+ change_relid(&sjinfo->syn_righthand, toRemove->relid, toKeep->relid);
+ change_varno((Expr*)sjinfo->semi_rhs_exprs, toRemove->relid, toKeep->relid);
+ }
+
+ /*
+ * Likewise update references in PlaceHolderVar data structures.
+ */
+ foreach(cell, root->placeholder_list)
+ {
+ PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(cell);
+
+ /*
+ * We are at an inner join of two base relations. A placeholder can't
+ * be needed here or evaluated here.
+ */
+ Assert(!bms_is_subset(phinfo->ph_eval_at, joinrelids));
+ Assert(!bms_is_subset(phinfo->ph_needed, joinrelids));
+
+ change_relid(&phinfo->ph_eval_at, toRemove->relid, toKeep->relid);
+ change_relid(&phinfo->ph_needed, toRemove->relid, toKeep->relid);
+ change_relid(&phinfo->ph_lateral, toRemove->relid, toKeep->relid);
+ change_relid(&phinfo->ph_var->phrels, toRemove->relid, toKeep->relid);
+ }
+
+ /*
+ * Update the equivalence classes that reference the removed relations.
+ */
+ foreach(cell, root->eq_classes)
+ {
+ EquivalenceClass *ec = lfirst(cell);
+
+ if (!bms_is_member(toRemove->relid, ec->ec_relids))
+ {
+ /*
+ * This EC doesn't reference the removed relation, nothing to be
+ * done for it.
+ */
+ continue;
+ }
+
+ /*
+ * Update the EC members to reference the remaining relation instead
+ * of the removed one.
+ */
+ update_ec_members(ec, toRemove->relid, toKeep->relid);
+ change_relid(&ec->ec_relids, toRemove->relid, toKeep->relid);
+
+ /*
+ * We will now update source and derived clauses of the EC.
+ *
+ * Restriction clauses for base relations are already distributed to
+ * the respective baserestrictinfo lists (see
+ * generate_implied_equalities). The above code has already processed
+ * this list, and updated these clauses to reference the remaining
+ * relation, so we can skip them here based on their relids.
+ *
+ * Likewise, we have already processed the join clauses that join the
+ * removed relation to the remaining one.
+ *
+ * Finally, there are join clauses that join the removed relation to
+ * some third relation. We can't just delete the source clauses and
+ * regenerate them from the EC, because the corresponding equality
+ * operators might be missing (see the handling of ec_broken).
+ * Therefore, we will update the references in the source clauses.
+ *
+ * Derived clauses can be generated again, so it is simpler to just
+ * delete them.
+ */
+ list_free(ec->ec_derives);
+ ec->ec_derives = NULL;
+ update_ec_sources(&ec->ec_sources, toRemove->relid, toKeep->relid);
+ }
+
+ /*
+ * Mark the rel as "dead" to show it is no longer part of the join tree.
+ * (Removing it from the baserel array altogether seems too risky.)
+ */
+ toRemove->reloptkind = RELOPT_DEADREL;
+
+ /*
+ * Update references to the removed relation from other baserels.
+ */
+ for (i = 1; i < root->simple_rel_array_size; i++)
+ {
+ RelOptInfo *otherrel = root->simple_rel_array[i];
+ int attroff;
+
+ /* no point in processing target rel itself */
+ if (i == toRemove->relid)
+ continue;
+
+ /* there may be empty slots corresponding to non-baserel RTEs */
+ if (otherrel == NULL)
+ continue;
+
+ Assert(otherrel->relid == i); /* sanity check on array */
+
+ /* Update attr_needed arrays. */
+ for (attroff = otherrel->max_attr - otherrel->min_attr;
+ attroff >= 0; attroff--)
+ {
+ change_relid(&otherrel->attr_needed[attroff], toRemove->relid,
+ toKeep->relid);
+ }
+
+ /* Update lateral references. */
+ change_varno((Expr*)otherrel->lateral_vars, toRemove->relid, toKeep->relid);
+ }
+
+ /*
+ * Change varno in some special cases with non-trivial RangeTblEntry
+ */
+ foreach(cell, root->parse->rtable)
+ {
+ RangeTblEntry *rte = lfirst(cell);
+
+ switch(rte->rtekind)
+ {
+ case RTE_FUNCTION:
+ change_varno((Expr*)rte->functions, toRemove->relid, toKeep->relid);
+ break;
+ case RTE_TABLEFUNC:
+ change_varno((Expr*)rte->tablefunc, toRemove->relid, toKeep->relid);
+ break;
+ case RTE_VALUES:
+ change_varno((Expr*)rte->values_lists, toRemove->relid, toKeep->relid);
+ break;
+ default:
+ /* no op */
+ break;
+ }
+ }
+}
+
+/*
+ * Test whether the relations are joined on the same unique column.
+ */
+static bool
+is_unique_self_join(PlannerInfo *root, Relids joinrelids, RelOptInfo *outer,
+ RelOptInfo *inner, List *restrictlist)
+{
+ UniqueRelInfo *outerinfo = NULL;
+ UniqueRelInfo *innerinfo = NULL;
+ List *outerValues, *innerValues;
+
+ innerrel_is_unique(root, joinrelids, inner->relids,
+ outer, JOIN_INNER, list_concat(list_concat_unique(list_copy(outer->joininfo), restrictlist),
+ outer->baserestrictinfo), true, &outerinfo);
+ if (!outerinfo || !outerinfo->index)
+ return false;
+
+ innerrel_is_unique(root, joinrelids, outer->relids,
+ inner, JOIN_INNER, list_concat(list_concat_unique(list_copy(inner->joininfo), restrictlist),
+ inner->baserestrictinfo), true, &innerinfo);
+ if (!innerinfo || !innerinfo->index)
+ return false;
+
+ /* We must have the same unique index for both relations. */
+ if (outerinfo->index->indexoid != innerinfo->index->indexoid)
+ return false;
+
+ if (restrictlist != NULL && IsA(((RestrictInfo*)linitial(restrictlist))->clause, Const))
+ return false;
+
+ /*
+ * We have proven that for both relations, the same unique index guarantees
+ * that there is at most one row where columns equal given values. These
+ * values must be the same for both relations, or else we won't match the
+ * same row on each side of join. A value may be either Const or Var of some
+ * other relation. For the purposes of this proof, the Vars of the inner and
+ * outer relation are the same, so we replace outer varno with inner and
+ * compare the column values using equal().
+ */
+ innerValues = copyObject(innerinfo->column_values);
+ outerValues = copyObject(outerinfo->column_values);
+ change_varno((Expr *) innerValues, outer->relid, inner->relid);
+ change_varno((Expr *) outerValues, outer->relid, inner->relid);
+ if (!equal(outerValues, innerValues))
+ {
+ list_free_deep(outerValues);
+ list_free_deep(innerValues);
+ return false;
+ }
+ list_free_deep(outerValues);
+ list_free_deep(innerValues);
+
+ return true;
+}
+
+/*
+ * Find and remove unique self joins in a group of base relations that have
+ * the same Oid.
+ *
+ * Returns a set of relids that were removed.
+ */
+static Relids
+remove_self_joins_one_group(UsjScratch *scratch, Index *relids, int n)
+{
+ PlannerInfo *root = scratch->root;
+ Relids joinrelids = scratch->joinrelids;
+ Relids result = NULL;
+ int i, o;
+
+ if (n < 2)
+ return NULL;
+
+ for (o = 0; o < n; o++)
+ {
+ RelOptInfo *outer = root->simple_rel_array[relids[o]];
+
+ for (i = o + 1; i < n; i++)
+ {
+ RelOptInfo *inner = root->simple_rel_array[relids[i]];
+ List *restrictlist;
+ ListCell *cell;
+
+ /* A sanity check: the relations have the same Oid. */
+ Assert(root->simple_rte_array[relids[i]]->relid
+ == root->simple_rte_array[relids[o]]->relid);
+
+ /*
+ * This optimization applies to inner joins only, so skip any
+ * relations that form a special join.
+ */
+ if (bms_is_member(relids[i], scratch->special_join_rels[relids[o]]))
+ continue;
+
+ /* Reuse joinrelids bitset to avoid reallocation. */
+ joinrelids = bms_del_members(joinrelids, joinrelids);
+
+ /*
+ * We only deal with base rels here, so their relids bitset
+ * contains only one member -- their relid.
+ */
+ joinrelids = bms_add_member(joinrelids, relids[o]);
+ joinrelids = bms_add_member(joinrelids, relids[i]);
+
+ /* Is it a unique self join? */
+ restrictlist = build_joinrel_restrictlist(root, joinrelids, outer,
+ inner);
+ if (!is_unique_self_join(root, joinrelids, outer, inner,
+ restrictlist))
+ continue;
+
+ /*
+ * We can't remove the join if the relations have row marks of
+ * different strength (e.g. one is locked FOR UPDATE and another
+ * just has ROW_MARK_REFERENCE for EvalPlanQual rechecking).
+ */
+ if (scratch->row_marks[relids[i]] && scratch->row_marks[relids[o]]
+ && scratch->row_marks[relids[i]]->markType
+ != scratch->row_marks[relids[o]]->markType)
+ {
+ continue;
+ }
+
+ /*
+ * Be safe to do not remove table participated in complicated PH
+ */
+ foreach(cell, root->placeholder_list)
+ {
+ PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(cell);
+
+ /* there isn't any other place to eval PHV */
+ if (bms_is_subset(phinfo->ph_eval_at, joinrelids) ||
+ bms_is_subset(phinfo->ph_needed, joinrelids))
+ break;
+ }
+
+ if (cell)
+ continue;
+
+ /*
+ * We can remove either relation, so remove the outer one, to
+ * simplify this loop.
+ */
+ remove_self_join_rel(scratch, joinrelids, restrictlist, inner, outer);
+ result = bms_add_member(result, relids[o]);
+
+ /*
+ * Replace varno in root targetlist and HAVING clause.
+ */
+ change_varno((Expr *) root->processed_tlist, relids[o], relids[i]);
+ change_varno((Expr *) root->parse->havingQual, relids[o], relids[i]);
+
+ /* We removed the outer relation, try the next one. */
+ break;
+ }
+ }
+
+ scratch->joinrelids = joinrelids;
+ return result;
+}
+
+/*
+ * A qsort comparator to sort the relids by the relation Oid.
+ */
+static int
+compare_rte(const Index *left, const Index *right, PlannerInfo *root)
+{
+ Oid l = root->simple_rte_array[*left]->relid;
+ Oid r = root->simple_rte_array[*right]->relid;
+
+ return l < r ? 1 : (l == r ? 0 : -1);
+}
+
+/*
+ * Find and remove unique self joins on a particular level of the join tree.
+ *
+ * We sort the relations by Oid and then examine each group with the same Oid.
+ * If we removed any relation, remove it from joinlist as well.
+ */
+static void
+remove_self_joins_one_level(UsjScratch *scratch, List **joinlist)
+{
+ ListCell *cell;
+ int counter;
+ Relids relidsToRemove = NULL;
+ Oid groupOid;
+ int groupStart;
+ int i;
+ int n = 0;
+ Index *relid_ascending = scratch->relids;
+ PlannerInfo *root = scratch->root;
+
+ /*
+ * Collect the ids of base relations at this level of the join tree.
+ */
+ foreach (cell, *joinlist)
+ {
+ RangeTblEntry *rte;
+ RelOptInfo *rel;
+ RangeTblRef *ref = (RangeTblRef *) lfirst(cell);
+ if (!IsA(ref, RangeTblRef))
+ continue;
+
+ rte = root->simple_rte_array[ref->rtindex];
+ rel = root->simple_rel_array[ref->rtindex];
+
+ /* We only care about base relations from which we select something. */
+ if (rte->rtekind != RTE_RELATION || rte->relkind != RELKIND_RELATION
+ || rel == NULL)
+ {
+ continue;
+ }
+
+ /*
+ * This optimization won't work for tables that have inheritance
+ * children.
+ */
+ if (rte->inh)
+ continue;
+
+ relid_ascending[n++] = ref->rtindex;
+
+ /*
+ * Limit the number of joins we process to control the quadratic
+ * behavior.
+ */
+ if (n > join_collapse_limit)
+ break;
+ }
+
+ if (n < 2)
+ return;
+
+ /*
+ * Find and process the groups of relations that have same Oid.
+ */
+ qsort_arg(relid_ascending, n, sizeof(*relid_ascending),
+ (qsort_arg_comparator) compare_rte, root);
+ groupOid = root->simple_rte_array[relid_ascending[0]]->relid;
+ groupStart = 0;
+ for (i = 1; i < n; i++)
+ {
+ RangeTblEntry *rte = root->simple_rte_array[relid_ascending[i]];
+ Assert(rte->relid != InvalidOid);
+ if (rte->relid != groupOid)
+ {
+ relidsToRemove = bms_add_members(relidsToRemove,
+ remove_self_joins_one_group(scratch, &relid_ascending[groupStart],
+ i - groupStart));
+ groupOid = rte->relid;
+ groupStart = i;
+ }
+ }
+ Assert(groupOid != InvalidOid);
+ Assert(groupStart < n);
+ relidsToRemove = bms_add_members(relidsToRemove,
+ remove_self_joins_one_group(scratch, &relid_ascending[groupStart],
+ n - groupStart));
+
+ /* Delete the removed relations from joinlist. */
+ for (counter = 0; counter < list_length(*joinlist); )
+ {
+ Node *node;
+
+ cell = list_nth_cell(*joinlist, counter);
+ node = (Node *) lfirst(cell);
+
+ if (IsA(node, RangeTblRef)
+ && bms_is_member(((RangeTblRef *) node)->rtindex, relidsToRemove))
+ {
+ *joinlist = list_delete_cell(*joinlist, cell);
+ }
+ else
+ counter++;
+ }
+}
+
+/*
+ * Find and remove unique self joins on a single level of a join tree, and
+ * recurse to handle deeper levels.
+ */
+static void
+remove_self_joins_recurse(UsjScratch *scratch, List **joinlist)
+{
+ ListCell *lc;
+ foreach (lc, *joinlist)
+ {
+ switch (((Node *) lfirst(lc))->type)
+ {
+ case T_List:
+ remove_self_joins_recurse(scratch, (List **) &lfirst(lc));
+ break;
+ case T_RangeTblRef:
+ break;
+ default:
+ Assert(false);
+ }
+ }
+ remove_self_joins_one_level(scratch, joinlist);
+}
+
+/*
+ * Find and remove useless self joins.
+ *
+ * We search for joins where the same relation is joined to itself on all
+ * columns of some unique index. If this condition holds, then, for
+ * each outer row, only one inner row matches, and it is the same row
+ * of the same relation. This allows us to remove the join and replace
+ * it with a scan that combines WHERE clauses from both sides. The join
+ * clauses themselves assume the form of X = X and can be replaced with
+ * NOT NULL clauses.
+ *
+ * For the sake of simplicity, we don't apply this optimization to special
+ * joins. Here is a list of what we could do in some particular cases:
+ * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
+ * and then removed normally.
+ * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
+ * (IS NULL on join columns OR NOT inner quals)'.
+ * 'a a1 left join a a2': could simplify to a scan like inner, but without
+ * NOT NULL condtions on join columns.
+ * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
+ * can both remove rows and introduce duplicates.
+ *
+ * To search for removable joins, we order all the relations on their Oid,
+ * go over each set with the same Oid, and consider each pair of relations
+ * in this set. We check that both relation are made unique by the same
+ * unique index with the same clauses.
+ *
+ * To remove the join, we mark one of the participating relation as
+ * dead, and rewrite all references to it to point to the remaining
+ * relation. This includes modifying RestrictInfos, EquivalenceClasses and
+ * EquivalenceMembers. We also have to modify the row marks. The join clauses
+ * of the removed relation become either restriction or join clauses, based on
+ * whether they reference any relations not participating in the removed join.
+ *
+ * 'targetlist' is the top-level targetlist of query. If it has any references
+ * to the removed relations, we update them to point to the remaining ones.
+ */
+void
+remove_useless_self_joins(PlannerInfo *root, List **joinlist)
+{
+ ListCell *lc;
+ UsjScratch scratch;
+
+ if (!enable_self_join_removal)
+ return;
+
+ scratch.root = root;
+ scratch.relids = palloc(root->simple_rel_array_size * sizeof(Index));
+ scratch.special_join_rels = palloc0(root->simple_rel_array_size * sizeof(Relids));
+ scratch.row_marks = palloc0(root->simple_rel_array_size * sizeof(PlanRowMark *));
+ scratch.joinrelids = NULL;
+
+ /* Find out which relations have special joins to which. */
+ foreach(lc, root->join_info_list)
+ {
+ SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
+ int bit = -1;
+ while ((bit = bms_next_member(info->syn_lefthand, bit)) >= 0)
+ {
+ RelOptInfo *rel = find_base_rel(root, bit);
+ scratch.special_join_rels[rel->relid] =
+ bms_add_members(scratch.special_join_rels[rel->relid],
+ info->syn_righthand);
+ }
+
+ bit = -1;
+ while ((bit = bms_next_member(info->syn_righthand, bit)) >= 0)
+ {
+ RelOptInfo *rel = find_base_rel(root, bit);
+ scratch.special_join_rels[rel->relid] =
+ bms_add_members(scratch.special_join_rels[rel->relid],
+ info->syn_lefthand);
+ }
+ }
+
+ /* Collect row marks. */
+ foreach (lc, root->rowMarks)
+ {
+ PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
+
+ /* Can't have more than one row mark for a relation. */
+ Assert(scratch.row_marks[rowMark->rti] == NULL);
+
+ scratch.row_marks[rowMark->rti] = rowMark;
+ }
+
+ /* Finally, remove the joins. */
+ remove_self_joins_recurse(&scratch, joinlist);
}
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 62dfc6d44a..416478f981 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -218,7 +218,7 @@ query_planner(PlannerInfo *root,
* jointree preprocessing, but the necessary information isn't available
* until we've built baserel data structures and classified qual clauses.
*/
- joinlist = remove_useless_joins(root, joinlist);
+ joinlist = remove_useless_left_joins(root, joinlist);
/*
* Also, reduce any semijoins with unique inner rels to plain inner joins.
@@ -226,6 +226,11 @@ query_planner(PlannerInfo *root,
*/
reduce_unique_semijoins(root);
+ /*
+ * Remove self joins on a unique column.
+ */
+ remove_useless_self_joins(root, &joinlist);
+
/*
* Now distribute "placeholders" to base rels as needed. This has to be
* done after join removal because removal could change whether a
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e6d08aede5..0ccd3f99af 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1597,7 +1597,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree &&
relation_has_unique_index_for(root, rel, NIL,
sjinfo->semi_rhs_exprs,
- sjinfo->semi_operators))
+ sjinfo->semi_operators,
+ NULL /*index_info*/))
{
pathnode->umethod = UNIQUE_PATH_NOOP;
pathnode->path.rows = rel->rows;
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 374f93890b..e1452b3e6a 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -39,14 +39,10 @@ typedef struct JoinHashEntry
static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *input_rel);
-static List *build_joinrel_restrictlist(PlannerInfo *root,
- RelOptInfo *joinrel,
- RelOptInfo *outer_rel,
- RelOptInfo *inner_rel);
static void build_joinrel_joinlist(RelOptInfo *joinrel,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel);
-static List *subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
+static List *subbuild_joinrel_restrictlist(Relids joinrelids,
List *joininfo_list,
List *new_restrictlist);
static List *subbuild_joinrel_joinlist(RelOptInfo *joinrel,
@@ -589,7 +585,7 @@ build_join_rel(PlannerInfo *root,
*/
if (restrictlist_ptr)
*restrictlist_ptr = build_joinrel_restrictlist(root,
- joinrel,
+ joinrel->relids,
outer_rel,
inner_rel);
return joinrel;
@@ -693,7 +689,7 @@ build_join_rel(PlannerInfo *root,
* caller might or might not need the restrictlist, but I need it anyway
* for set_joinrel_size_estimates().)
*/
- restrictlist = build_joinrel_restrictlist(root, joinrel,
+ restrictlist = build_joinrel_restrictlist(root, joinrel->relids,
outer_rel, inner_rel);
if (restrictlist_ptr)
*restrictlist_ptr = restrictlist;
@@ -1026,7 +1022,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
* the various joinlist entries ultimately refer to RestrictInfos
* pushed into them by distribute_restrictinfo_to_rels().
*
- * 'joinrel' is a join relation node
+ * 'joinrelids' is a join relation id set
* 'outer_rel' and 'inner_rel' are a pair of relations that can be joined
* to form joinrel.
*
@@ -1039,9 +1035,9 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
* RestrictInfo nodes are no longer context-dependent. Instead, just include
* the original nodes in the lists made for the join relation.
*/
-static List *
+List *
build_joinrel_restrictlist(PlannerInfo *root,
- RelOptInfo *joinrel,
+ Relids joinrelids,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel)
{
@@ -1052,8 +1048,8 @@ build_joinrel_restrictlist(PlannerInfo *root,
* eliminating any duplicates (important since we will see many of the
* same clauses arriving from both input relations).
*/
- result = subbuild_joinrel_restrictlist(joinrel, outer_rel->joininfo, NIL);
- result = subbuild_joinrel_restrictlist(joinrel, inner_rel->joininfo, result);
+ result = subbuild_joinrel_restrictlist(joinrelids, outer_rel->joininfo, NIL);
+ result = subbuild_joinrel_restrictlist(joinrelids, inner_rel->joininfo, result);
/*
* Add on any clauses derived from EquivalenceClasses. These cannot be
@@ -1062,7 +1058,7 @@ build_joinrel_restrictlist(PlannerInfo *root,
*/
result = list_concat(result,
generate_join_implied_equalities(root,
- joinrel->relids,
+ joinrelids,
outer_rel->relids,
inner_rel));
@@ -1088,7 +1084,7 @@ build_joinrel_joinlist(RelOptInfo *joinrel,
}
static List *
-subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
+subbuild_joinrel_restrictlist(Relids joinrelids,
List *joininfo_list,
List *new_restrictlist)
{
@@ -1098,7 +1094,7 @@ subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
- if (bms_is_subset(rinfo->required_relids, joinrel->relids))
+ if (bms_is_subset(rinfo->required_relids, joinrelids))
{
/*
* This clause becomes a restriction clause for the joinrel, since
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index cacbe904db..20aa037f18 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -125,6 +125,7 @@ extern char *temp_tablespaces;
extern bool ignore_checksum_failure;
extern bool ignore_invalid_pages;
extern bool synchronize_seqscans;
+extern bool enable_self_join_removal;
#ifdef TRACE_SYNCSCAN
extern bool trace_syncscan;
@@ -1065,6 +1066,16 @@ static struct config_bool ConfigureNamesBool[] =
true,
NULL, NULL, NULL
},
+ {
+ {"enable_self_join_removal", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("Enable removal of unique self-joins."),
+ NULL,
+ GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE
+ },
+ &enable_self_join_removal,
+ true,
+ NULL, NULL, NULL
+ },
{
{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index c0623f106d..48685db2ba 100644
--- a/src/backend/utils/mmgr/aset.c
+++ b/src/backend/utils/mmgr/aset.c
@@ -1485,7 +1485,6 @@ AllocSetCheck(MemoryContext context)
chsize = chunk->size; /* aligned chunk size */
dsize = chunk->requested_size; /* real data */
-
/*
* Check chunk size
*/
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index baced7eec0..b0bcc266cb 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -272,6 +272,7 @@ typedef enum NodeTag
T_RollupData,
T_GroupingSetData,
T_StatisticExtInfo,
+ T_UniqueRelInfo,
/*
* TAGS FOR MEMORY NODES (memnodes.h)
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 3d3be197e0..417dbdb2b9 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -536,8 +536,11 @@ typedef struct PartitionSchemeData *PartitionScheme;
* populate these fields, for base rels; but someday they might be used for
* join rels too:
*
- * unique_for_rels - list of Relid sets, each one being a set of other
- * rels for which this one has been proven unique
+ * unique_for_rels - list of UniqueRelInfos, each of them recording
+ * a set of other rels for which this one has been proven
+ * unique. If this is a baserel that is made unique by an
+ * index, UniqueRelInfo also stores the information about
+ * that index.
* non_unique_for_rels - list of Relid sets, each one being a set of
* other rels for which we have tried and failed to prove
* this one unique
@@ -2509,4 +2512,19 @@ typedef struct JoinCostWorkspace
double inner_rows_total;
} JoinCostWorkspace;
+/*
+ * UniqueRelInfo records a fact that a relation is unique when being joined
+ * to other relation(s) specified by outerrelids. If the uniqueness is
+ * guaranteed by a unique index, this index is also saved. The values that
+ * constrain index columns, be it Vars of outer relations or Consts, are saved
+ * to column_values list.
+ */
+typedef struct UniqueRelInfo
+{
+ NodeTag tag;
+ Relids outerrelids;
+ IndexOptInfo *index;
+ List *column_values;
+} UniqueRelInfo;
+
#endif /* PATHNODES_H */
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index e450fe112a..0b503e2cf4 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -288,6 +288,10 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root,
RelOptInfo *inner_rel,
SpecialJoinInfo *sjinfo,
List **restrictlist_ptr);
+extern List *build_joinrel_restrictlist(PlannerInfo *root,
+ Relids joinrelids,
+ RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel);
extern Relids min_join_parameterization(PlannerInfo *root,
Relids joinrelids,
RelOptInfo *outer_rel,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 9ab73bd20c..f3c6204e0a 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -72,7 +72,8 @@ extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel);
extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
List *restrictlist,
- List *exprlist, List *oprlist);
+ List *exprlist, List *oprlist,
+ UniqueRelInfo **info);
extern bool indexcol_is_bool_constant_for_query(IndexOptInfo *index,
int indexcol);
extern bool match_index_to_operand(Node *operand, int indexcol,
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index eab486a621..cedbe3f05d 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -16,6 +16,7 @@
#include "nodes/pathnodes.h"
#include "nodes/plannodes.h"
+#include "optimizer/paths.h"
/* GUC parameters */
#define DEFAULT_CURSOR_TUPLE_FRACTION 0.1
@@ -96,13 +97,15 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root);
/*
* prototypes for plan/analyzejoins.c
*/
-extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
+extern List *remove_useless_left_joins(PlannerInfo *root, List *joinlist);
extern void reduce_unique_semijoins(PlannerInfo *root);
extern bool query_supports_distinctness(Query *query);
extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
extern bool innerrel_is_unique(PlannerInfo *root,
Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel,
- JoinType jointype, List *restrictlist, bool force_cache);
+ JoinType jointype, List *restrictlist, bool force_cache,
+ UniqueRelInfo **unique_info);
+extern void remove_useless_self_joins(PlannerInfo *root, List **jointree);
/*
* prototypes for plan/setrefs.c
diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out
index c448d85dec..a57905f3ab 100644
--- a/src/test/regress/expected/equivclass.out
+++ b/src/test/regress/expected/equivclass.out
@@ -439,3 +439,35 @@ explain (costs off)
Filter: ((unique1 = unique1) OR (unique2 = unique2))
(2 rows)
+-- Test that broken ECs are processed correctly during self join removal.
+-- Disable merge joins so that we don't get an error about missing commutator.
+-- Test both orientations of the join clause, because only one of them breaks
+-- the EC.
+set enable_mergejoin to off;
+explain (costs off)
+ select * from ec0 m join ec0 n on m.ff = n.ff
+ join ec1 p on m.ff + n.ff = p.f1;
+ QUERY PLAN
+----------------------------------------
+ Nested Loop
+ Join Filter: ((n.ff + n.ff) = p.f1)
+ -> Seq Scan on ec1 p
+ -> Materialize
+ -> Seq Scan on ec0 n
+ Filter: (ff IS NOT NULL)
+(6 rows)
+
+explain (costs off)
+ select * from ec0 m join ec0 n on m.ff = n.ff
+ join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1;
+ QUERY PLAN
+---------------------------------------------------------------
+ Nested Loop
+ Join Filter: ((p.f1)::bigint = ((n.ff + n.ff))::int8alias1)
+ -> Seq Scan on ec1 p
+ -> Materialize
+ -> Seq Scan on ec0 n
+ Filter: (ff IS NOT NULL)
+(6 rows)
+
+reset enable_mergejoin;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 761376b007..e51961d264 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4673,6 +4673,281 @@ select * from
----+----+----+----
(0 rows)
+--
+-- test that semi- or inner self-joins on a unique column are removed
+--
+-- enable only nestloop to get more predictable plans
+set enable_hashjoin to off;
+set enable_mergejoin to off;
+create table sj (a int unique, b int);
+insert into sj values (1, null), (null, 2), (2, 1);
+analyze sj;
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+ a | b
+---+---
+ 2 | 1
+(1 row)
+
+explain (costs off)
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+ QUERY PLAN
+-----------------------------------------------
+ Seq Scan on sj q
+ Filter: ((a IS NOT NULL) AND (b = (a - 1)))
+(2 rows)
+
+explain (costs off)
+select * from sj p
+where exists (select * from sj q
+ where q.a = p.a and q.b < 10);
+ QUERY PLAN
+------------------------------------------
+ Seq Scan on sj q
+ Filter: ((a IS NOT NULL) AND (b < 10))
+(2 rows)
+
+-- Double self-join removal.
+-- Use a condition on "b + 1", not on "b", for the second join, so that
+-- the equivalence class is different from the first one, and we can
+-- test the non-ec code path.
+explain (costs off)
+select * from sj t1 join sj t2 on t1.a = t2.a and t1.b = t2.b
+ join sj t3 on t2.a = t3.a and t2.b + 1 = t3.b + 1;
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Seq Scan on sj t3
+ Filter: ((a IS NOT NULL) AND (b IS NOT NULL) AND ((b + 1) IS NOT NULL))
+(2 rows)
+
+-- subselect that references the removed relation
+explain (costs off)
+select t1.a, (select a from sj where a = t2.a and a = t1.a)
+from sj t1, sj t2
+where t1.a = t2.a;
+ QUERY PLAN
+------------------------------------------
+ Seq Scan on sj t2
+ Filter: (a IS NOT NULL)
+ SubPlan 1
+ -> Result
+ One-Time Filter: (t2.a = t2.a)
+ -> Seq Scan on sj
+ Filter: (a = t2.a)
+(7 rows)
+
+-- self-join under outer join
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on x.a = z.q1;
+ QUERY PLAN
+------------------------------------
+ Nested Loop Left Join
+ Join Filter: (y.a = z.q1)
+ -> Seq Scan on sj y
+ Filter: (a IS NOT NULL)
+ -> Materialize
+ -> Seq Scan on int8_tbl z
+(6 rows)
+
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on y.a = z.q1;
+ QUERY PLAN
+------------------------------------
+ Nested Loop Left Join
+ Join Filter: (y.a = z.q1)
+ -> Seq Scan on sj y
+ Filter: (a IS NOT NULL)
+ -> Materialize
+ -> Seq Scan on int8_tbl z
+(6 rows)
+
+-- Test that placeholders are updated correctly after join removal
+explain (costs off)
+select * from (values (1)) x
+left join (select coalesce(y.q1, 1) from int8_tbl y
+ right join sj j1 inner join sj j2 on j1.a = j2.a
+ on true) z
+on true;
+ QUERY PLAN
+------------------------------------------
+ Nested Loop Left Join
+ -> Result
+ -> Nested Loop Left Join
+ -> Seq Scan on sj j2
+ Filter: (a IS NOT NULL)
+ -> Materialize
+ -> Seq Scan on int8_tbl y
+(7 rows)
+
+-- Test that OR predicated are updated correctly after join removal
+CREATE TABLE tab_with_flag ( id INT PRIMARY KEY, is_flag SMALLINT);
+CREATE INDEX idx_test_is_flag ON tab_with_flag (is_flag);
+explain (costs off)
+SELECT COUNT(*) FROM tab_with_flag WHERE (is_flag IS NULL OR is_flag = 0) AND id IN (SELECT id FROM tab_with_flag WHERE id IN (2, 3));
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tab_with_flag
+ Recheck Cond: ((id = ANY ('{2,3}'::integer[])) AND (id IS NOT NULL))
+ Filter: ((is_flag IS NULL) OR (is_flag = 0))
+ -> Bitmap Index Scan on tab_with_flag_pkey
+ Index Cond: ((id = ANY ('{2,3}'::integer[])) AND (id IS NOT NULL))
+(6 rows)
+
+DROP TABLE tab_with_flag;
+-- HAVING clause
+explain (costs off)
+select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1;
+ QUERY PLAN
+---------------------------------
+ HashAggregate
+ Group Key: q.b
+ Filter: (sum(q.a) = 1)
+ -> Seq Scan on sj q
+ Filter: (a IS NOT NULL)
+(5 rows)
+
+-- update lateral references and range table entry reference
+explain (verbose, costs off)
+select 1 from (select x.* from sj x, sj y where x.a = y.a) q,
+ lateral generate_series(1, q.a) gs(i);
+ QUERY PLAN
+------------------------------------------------------
+ Nested Loop
+ Output: 1
+ -> Seq Scan on public.sj y
+ Output: y.a, y.b
+ Filter: (y.a IS NOT NULL)
+ -> Function Scan on pg_catalog.generate_series gs
+ Output: gs.i
+ Function Call: generate_series(1, y.a)
+(8 rows)
+
+explain (verbose, costs off)
+select 1 from (select y.* from sj x, sj y where x.a = y.a) q,
+ lateral generate_series(1, q.a) gs(i);
+ QUERY PLAN
+------------------------------------------------------
+ Nested Loop
+ Output: 1
+ -> Seq Scan on public.sj y
+ Output: y.a, y.b
+ Filter: (y.a IS NOT NULL)
+ -> Function Scan on pg_catalog.generate_series gs
+ Output: gs.i
+ Function Call: generate_series(1, y.a)
+(8 rows)
+
+-- Test that a non-EC-derived join clause is processed correctly. Use an
+-- outer join so that we can't form an EC.
+explain (costs off) select * from sj p join sj q on p.a = q.a
+ left join sj r on p.a + q.a = r.a;
+ QUERY PLAN
+------------------------------------
+ Nested Loop Left Join
+ Join Filter: ((q.a + q.a) = r.a)
+ -> Seq Scan on sj q
+ Filter: (a IS NOT NULL)
+ -> Materialize
+ -> Seq Scan on sj r
+(6 rows)
+
+-- FIXME this constant false filter doesn't look good. Should we merge
+-- equivalence classes?
+explain (costs off)
+select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2;
+ QUERY PLAN
+-----------------------------------------------------
+ Seq Scan on sj q
+ Filter: ((a IS NOT NULL) AND (b = 2) AND (b = 1))
+(2 rows)
+
+-- Check that attr_needed is updated correctly after self-join removal. In this
+-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2.
+-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b.
+-- Use index scan for k1 so that we don't get 'b' from physical tlist used for
+-- seqscan. Also disable reordering of joins because this test depends on a
+-- particular join tree.
+create table sk (a int, b int);
+create index on sk(a);
+set join_collapse_limit to 1;
+set enable_seqscan to off;
+explain (costs off) select 1 from
+ (sk k1 join sk k2 on k1.a = k2.a)
+ join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b;
+ QUERY PLAN
+-----------------------------------------------------
+ Nested Loop
+ Join Filter: (k1.b = j2.b)
+ -> Nested Loop
+ -> Index Scan using sk_a_idx on sk k1
+ -> Index Only Scan using sk_a_idx on sk k2
+ Index Cond: (a = k1.a)
+ -> Materialize
+ -> Index Scan using sj_a_key on sj j2
+ Index Cond: (a IS NOT NULL)
+(9 rows)
+
+explain (costs off) select 1 from
+ (sk k1 join sk k2 on k1.a = k2.a)
+ join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b;
+ QUERY PLAN
+-----------------------------------------------------
+ Nested Loop
+ Join Filter: (k1.b = j2.b)
+ -> Nested Loop
+ -> Index Scan using sk_a_idx on sk k1
+ -> Index Only Scan using sk_a_idx on sk k2
+ Index Cond: (a = k1.a)
+ -> Materialize
+ -> Index Scan using sj_a_key on sj j2
+ Index Cond: (a IS NOT NULL)
+(9 rows)
+
+reset join_collapse_limit;
+reset enable_seqscan;
+-- Check that clauses from the join filter list is not lost on the self-join removal
+CREATE TABLE emp1 ( id SERIAL PRIMARY KEY NOT NULL, code int);
+explain (verbose, costs off)
+SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code;
+ QUERY PLAN
+----------------------------------------------------------
+ Seq Scan on public.emp1 e2
+ Output: e2.id, e2.code, e2.id, e2.code
+ Filter: ((e2.id IS NOT NULL) AND (e2.code <> e2.code))
+(3 rows)
+
+--
+---- It's not clear yet what to do in some of the cases we discussed
+---- on the list, so they are disabled for now.
+--
+---- We need an index on two columns for the next couple of tests.
+--create table sl(a int, b int);
+--insert into sl values (1, 1), (1, 2), (2, 1);
+--create unique index on sl(a, b);
+--vacuum analyze sl;
+--
+---- Both sides are unique, but base quals are different
+--explain (costs off)
+--select * from sl t1, sl t2
+--where t1.a = t2.a and t1.b = 1 and t2.b = 2;
+--
+---- Only one side is unqiue
+--select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1;
+--select * from sl t1, sl t2 where t1.a = t2.a and t2.b = 1;
+--
+---- Several uniques indexes match, and we select a different one
+---- for each side, so the join is not removed
+--create table sm(a int unique, b int unique, c int unique);
+--explain (costs off)
+--select * from sm m, sm n where m.a = n.b and m.c = n.c;
+--explain (costs off)
+--select * from sm m, sm n where m.a = n.c and m.b = n.b;
+--explain (costs off)
+--select * from sm m, sm n where m.c = n.b and m.a = n.a;
+reset enable_hashjoin;
+reset enable_mergejoin;
--
-- Test hints given on incorrect column references are useful
--
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 5de53f2782..97682cd2cc 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -2214,16 +2214,13 @@ SELECT * FROM rw_view1;
(1 row)
EXPLAIN (costs off) DELETE FROM rw_view1 WHERE id = 1 AND snoop(data);
- QUERY PLAN
--------------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------
Update on base_tbl base_tbl_1
- -> Nested Loop
- -> Index Scan using base_tbl_pkey on base_tbl base_tbl_1
- Index Cond: (id = 1)
- -> Index Scan using base_tbl_pkey on base_tbl
- Index Cond: (id = 1)
- Filter: ((NOT deleted) AND snoop(data))
-(7 rows)
+ -> Index Scan using base_tbl_pkey on base_tbl
+ Index Cond: (id = 1)
+ Filter: ((NOT deleted) AND snoop(data))
+(4 rows)
DELETE FROM rw_view1 WHERE id = 1 AND snoop(data);
NOTICE: snooped value: Row 1
diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql
index 85aa65de39..b1e248313a 100644
--- a/src/test/regress/sql/equivclass.sql
+++ b/src/test/regress/sql/equivclass.sql
@@ -262,3 +262,20 @@ explain (costs off)
-- this could be converted, but isn't at present
explain (costs off)
select * from tenk1 where unique1 = unique1 or unique2 = unique2;
+
+
+-- Test that broken ECs are processed correctly during self join removal.
+-- Disable merge joins so that we don't get an error about missing commutator.
+-- Test both orientations of the join clause, because only one of them breaks
+-- the EC.
+set enable_mergejoin to off;
+
+explain (costs off)
+ select * from ec0 m join ec0 n on m.ff = n.ff
+ join ec1 p on m.ff + n.ff = p.f1;
+
+explain (costs off)
+ select * from ec0 m join ec0 n on m.ff = n.ff
+ join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1;
+
+reset enable_mergejoin;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 5fc6617369..1c97635acd 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1655,6 +1655,145 @@ select * from
select * from
int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok
+--
+-- test that semi- or inner self-joins on a unique column are removed
+--
+
+-- enable only nestloop to get more predictable plans
+set enable_hashjoin to off;
+set enable_mergejoin to off;
+
+create table sj (a int unique, b int);
+insert into sj values (1, null), (null, 2), (2, 1);
+analyze sj;
+
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+
+explain (costs off)
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+
+explain (costs off)
+select * from sj p
+where exists (select * from sj q
+ where q.a = p.a and q.b < 10);
+
+-- Double self-join removal.
+-- Use a condition on "b + 1", not on "b", for the second join, so that
+-- the equivalence class is different from the first one, and we can
+-- test the non-ec code path.
+explain (costs off)
+select * from sj t1 join sj t2 on t1.a = t2.a and t1.b = t2.b
+ join sj t3 on t2.a = t3.a and t2.b + 1 = t3.b + 1;
+
+-- subselect that references the removed relation
+explain (costs off)
+select t1.a, (select a from sj where a = t2.a and a = t1.a)
+from sj t1, sj t2
+where t1.a = t2.a;
+
+-- self-join under outer join
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on x.a = z.q1;
+
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on y.a = z.q1;
+
+-- Test that placeholders are updated correctly after join removal
+explain (costs off)
+select * from (values (1)) x
+left join (select coalesce(y.q1, 1) from int8_tbl y
+ right join sj j1 inner join sj j2 on j1.a = j2.a
+ on true) z
+on true;
+
+-- Test that OR predicated are updated correctly after join removal
+CREATE TABLE tab_with_flag ( id INT PRIMARY KEY, is_flag SMALLINT);
+CREATE INDEX idx_test_is_flag ON tab_with_flag (is_flag);
+explain (costs off)
+SELECT COUNT(*) FROM tab_with_flag WHERE (is_flag IS NULL OR is_flag = 0) AND id IN (SELECT id FROM tab_with_flag WHERE id IN (2, 3));
+DROP TABLE tab_with_flag;
+
+-- HAVING clause
+explain (costs off)
+select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1;
+
+-- update lateral references and range table entry reference
+explain (verbose, costs off)
+select 1 from (select x.* from sj x, sj y where x.a = y.a) q,
+ lateral generate_series(1, q.a) gs(i);
+
+explain (verbose, costs off)
+select 1 from (select y.* from sj x, sj y where x.a = y.a) q,
+ lateral generate_series(1, q.a) gs(i);
+
+-- Test that a non-EC-derived join clause is processed correctly. Use an
+-- outer join so that we can't form an EC.
+explain (costs off) select * from sj p join sj q on p.a = q.a
+ left join sj r on p.a + q.a = r.a;
+
+-- FIXME this constant false filter doesn't look good. Should we merge
+-- equivalence classes?
+explain (costs off)
+select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2;
+
+-- Check that attr_needed is updated correctly after self-join removal. In this
+-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2.
+-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b.
+-- Use index scan for k1 so that we don't get 'b' from physical tlist used for
+-- seqscan. Also disable reordering of joins because this test depends on a
+-- particular join tree.
+create table sk (a int, b int);
+create index on sk(a);
+set join_collapse_limit to 1;
+set enable_seqscan to off;
+explain (costs off) select 1 from
+ (sk k1 join sk k2 on k1.a = k2.a)
+ join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b;
+explain (costs off) select 1 from
+ (sk k1 join sk k2 on k1.a = k2.a)
+ join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b;
+reset join_collapse_limit;
+reset enable_seqscan;
+
+-- Check that clauses from the join filter list is not lost on the self-join removal
+CREATE TABLE emp1 ( id SERIAL PRIMARY KEY NOT NULL, code int);
+explain (verbose, costs off)
+SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code;
+
+--
+---- It's not clear yet what to do in some of the cases we discussed
+---- on the list, so they are disabled for now.
+--
+---- We need an index on two columns for the next couple of tests.
+--create table sl(a int, b int);
+--insert into sl values (1, 1), (1, 2), (2, 1);
+--create unique index on sl(a, b);
+--vacuum analyze sl;
+--
+---- Both sides are unique, but base quals are different
+--explain (costs off)
+--select * from sl t1, sl t2
+--where t1.a = t2.a and t1.b = 1 and t2.b = 2;
+--
+---- Only one side is unqiue
+--select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1;
+--select * from sl t1, sl t2 where t1.a = t2.a and t2.b = 1;
+--
+---- Several uniques indexes match, and we select a different one
+---- for each side, so the join is not removed
+--create table sm(a int unique, b int unique, c int unique);
+--explain (costs off)
+--select * from sm m, sm n where m.a = n.b and m.c = n.c;
+--explain (costs off)
+--select * from sm m, sm n where m.a = n.c and m.b = n.b;
+--explain (costs off)
+--select * from sm m, sm n where m.c = n.b and m.a = n.a;
+
+reset enable_hashjoin;
+reset enable_mergejoin;
+
--
-- Test hints given on incorrect column references are useful
--
--
2.17.1