Some more bug fixes in this patch.
-- Konstantin Knizhnik Postgres Professional: http://www.postgrespro.com The Russian Postgres Company
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index b0dcd02..dc8cb9c 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2274,7 +2274,8 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node) WRITE_OID_FIELD(userid); WRITE_BOOL_FIELD(useridiscurrent); /* we don't try to print fdwroutine or fdw_private */ - /* can't print unique_for_rels/non_unique_for_rels; BMSes aren't Nodes */ + WRITE_NODE_FIELD(unique_for_rels); + /* can't print non_unique_for_rels; BMSes aren't Nodes */ WRITE_NODE_FIELD(baserestrictinfo); WRITE_UINT_FIELD(baserestrict_min_security); WRITE_NODE_FIELD(joininfo); @@ -2347,6 +2348,19 @@ _outStatisticExtInfo(StringInfo str, const StatisticExtInfo *node) } static void +_outUniqueRelInfo(StringInfo str, const UniqueRelInfo *node) +{ + WRITE_NODE_TYPE("UNIQUERELINFO"); + + WRITE_BITMAPSET_FIELD(outerrelids); + if (node->index) + { + WRITE_OID_FIELD(index->indexoid); + WRITE_NODE_FIELD(column_values); + } +} + +static void _outEquivalenceClass(StringInfo str, const EquivalenceClass *node) { /* @@ -4122,6 +4136,9 @@ outNode(StringInfo str, const void *obj) case T_StatisticExtInfo: _outStatisticExtInfo(str, obj); break; + case T_UniqueRelInfo: + _outUniqueRelInfo(str, obj); + break; case T_ExtensibleNode: _outExtensibleNode(str, obj); break; diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 37b257c..3d0d03b 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -3564,7 +3564,8 @@ ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel, * relation_has_unique_index_for * Determine whether the relation provably has at most one row satisfying * a set of equality conditions, because the conditions constrain all - * columns of some unique index. + * columns of some unique index. If index_info is not null, it is set to + * point to a new UniqueRelInfo containing the index and conditions. * * The conditions can be represented in either or both of two ways: * 1. A list of RestrictInfo nodes, where the caller has already determined @@ -3585,12 +3586,16 @@ ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel, bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist, - List *exprlist, List *oprlist) + List *exprlist, List *oprlist, + UniqueRelInfo **unique_info) { ListCell *ic; Assert(list_length(exprlist) == list_length(oprlist)); + if (unique_info) + *unique_info = NULL; + /* Short-circuit if no indexes... */ if (rel->indexlist == NIL) return false; @@ -3641,6 +3646,7 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, { IndexOptInfo *ind = (IndexOptInfo *) lfirst(ic); int c; + List *column_values = NIL; /* * If the index is not unique, or not immediately enforced, or if it's @@ -3689,6 +3695,9 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, if (match_index_to_operand(rexpr, c, ind)) { matched = true; /* column is unique */ + column_values = lappend(column_values, rinfo->outer_is_left + ? get_leftop(rinfo->clause) + : get_rightop(rinfo->clause)); break; } } @@ -3731,7 +3740,22 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, /* Matched all key columns of this index? */ if (c == ind->nkeycolumns) + { + if (unique_info) + { + /* This may be called in GEQO memory context. */ + MemoryContext oldContext = MemoryContextSwitchTo(root->planner_cxt); + *unique_info = makeNode(UniqueRelInfo); + (*unique_info)->index = ind; + (*unique_info)->column_values = list_copy(column_values); + MemoryContextSwitchTo(oldContext); + } + if (column_values) + list_free(column_values); return true; + } + if (column_values) + list_free(column_values); } return false; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index dc28b56..450d2d4 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -176,7 +176,8 @@ add_paths_to_joinrel(PlannerInfo *root, innerrel, JOIN_INNER, restrictlist, - false); + false, + NULL /*index_info*/); break; default: extra.inner_unique = innerrel_is_unique(root, @@ -185,7 +186,8 @@ add_paths_to_joinrel(PlannerInfo *root, innerrel, jointype, restrictlist, - false); + false, + NULL /*index_info*/); break; } diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index d19ff41..39aef73 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -22,6 +22,7 @@ */ #include "postgres.h" +#include "catalog/pg_class.h" #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/joininfo.h" @@ -29,8 +30,10 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" +#include "optimizer/restrictinfo.h" #include "optimizer/tlist.h" #include "utils/lsyscache.h" +#include "utils/memutils.h" /* local functions */ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); @@ -39,15 +42,16 @@ static void remove_rel_from_query(PlannerInfo *root, int relid, static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, - List *clause_list); + List *clause_list, UniqueRelInfo **unique_info); static Oid distinct_col_search(int colno, List *colnos, List *opids); static bool is_innerrel_unique_for(PlannerInfo *root, Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, - List *restrictlist); - + List *restrictlist, + UniqueRelInfo **unique_info); +static void change_rinfo(RestrictInfo* rinfo, Index from, Index to); /* * remove_useless_joins @@ -58,7 +62,7 @@ static bool is_innerrel_unique_for(PlannerInfo *root, * data structures that have to be updated are accessible via "root". */ List * -remove_useless_joins(PlannerInfo *root, List *joinlist) +remove_useless_left_joins(PlannerInfo *root, List *joinlist) { ListCell *lc; @@ -161,7 +165,6 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) int innerrelid; RelOptInfo *innerrel; Relids joinrelids; - List *clause_list = NIL; ListCell *l; int attroff; @@ -237,67 +240,24 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) } /* - * Search for mergejoinable clauses that constrain the inner rel against - * either the outer rel or a pseudoconstant. If an operator is - * mergejoinable then it behaves like equality for some btree opclass, so - * it's what we want. The mergejoinability test also eliminates clauses - * containing volatile functions, which we couldn't depend on. + * Check for pushed-down clauses referencing the inner rel. If there is + * such a clause then join removal has to be disallowed. We have to + * check this despite the previous attr_needed checks because of the + * possibility of pushed-down clauses referencing the rel. */ foreach(l, innerrel->joininfo) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); - - /* - * If it's not a join clause for this outer join, we can't use it. - * Note that if the clause is pushed-down, then it is logically from - * above the outer join, even if it references no other rels (it might - * be from WHERE, for example). - */ - if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids)) - { - /* - * If such a clause actually references the inner rel then join - * removal has to be disallowed. We have to check this despite - * the previous attr_needed checks because of the possibility of - * pushed-down clauses referencing the rel. - */ - if (bms_is_member(innerrelid, restrictinfo->clause_relids)) + if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids) + && bms_is_member(innerrel->relid, restrictinfo->clause_relids)) return false; - continue; /* else, ignore; not useful here */ - } - - /* Ignore if it's not a mergejoinable clause */ - if (!restrictinfo->can_join || - restrictinfo->mergeopfamilies == NIL) - continue; /* not mergejoinable */ - - /* - * Check if clause has the form "outer op inner" or "inner op outer", - * and if so mark which side is inner. - */ - if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand, - innerrel->relids)) - continue; /* no good for these input relations */ - - /* OK, add to list */ - clause_list = lappend(clause_list, restrictinfo); } - /* - * Now that we have the relevant equality join clauses, try to prove the - * innerrel distinct. - */ - if (rel_is_distinct_for(root, innerrel, clause_list)) - return true; - - /* - * Some day it would be nice to check for other methods of establishing - * distinctness. - */ - return false; + return is_innerrel_unique_for(root, joinrelids, sjinfo->min_lefthand, + innerrel, sjinfo->jointype, innerrel->joininfo, + NULL /*unique_index*/); } - /* * Remove the target relid from the planner's data structures, having * determined that there is no need to include it in the query. @@ -561,7 +521,7 @@ reduce_unique_semijoins(PlannerInfo *root) /* Test whether the innerrel is unique for those clauses. */ if (!innerrel_is_unique(root, joinrelids, sjinfo->min_lefthand, innerrel, - JOIN_SEMI, restrictlist, true)) + JOIN_SEMI, restrictlist, true, NULL /*index_info*/)) continue; /* OK, remove the SpecialJoinInfo from the list. */ @@ -636,10 +596,17 @@ rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) * Note that the passed-in clause_list may be destructively modified! This * is OK for current uses, because the clause_list is built by the caller for * the sole purpose of passing to this function. + * + * If unique_index is not null, it is set to point to the index that guarantees + * uniqueness for a base relation. */ static bool -rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) +rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list, + UniqueRelInfo **unique_info) { + if (unique_info) + *unique_info = NULL; + /* * We could skip a couple of tests here if we assume all callers checked * rel_supports_distinctness first, but it doesn't seem worth taking any @@ -654,8 +621,8 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) * relation_has_unique_index_for automatically adds any usable * restriction clauses for the rel, so we needn't do that here. */ - if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL)) - return true; + return relation_has_unique_index_for(root, rel, clause_list, NIL, NIL, + unique_info); } else if (rel->rtekind == RTE_SUBQUERY) { @@ -959,6 +926,10 @@ distinct_col_search(int colno, List *colnos, List *opids) * heuristic about whether to cache negative answers; it should be "true" * if making an inquiry that is not part of the normal bottom-up join search * sequence. + * + * If index_info_out is not null, it is set to point to a new UniqueRelInfo + * allocated in root memory context, that describes the index that guarantees + * uniqueness. */ bool innerrel_is_unique(PlannerInfo *root, @@ -967,12 +938,23 @@ innerrel_is_unique(PlannerInfo *root, RelOptInfo *innerrel, JoinType jointype, List *restrictlist, - bool force_cache) + bool force_cache, + UniqueRelInfo **unique_info_out) { MemoryContext old_context; ListCell *lc; + UniqueRelInfo *unique_info; + + if (unique_info_out) + *unique_info_out = NULL; - /* Certainly can't prove uniqueness when there are no joinclauses */ + /* + * It is possible to prove uniqueness even in the absence of joinclauses, + * just from baserestrictinfos alone. However, in these cases the inner + * relation returns one row at most, so join removal won't give much + * benefit. It seems better to save some planning time by ignoring these + * cases. + */ if (restrictlist == NIL) return false; @@ -992,10 +974,14 @@ innerrel_is_unique(PlannerInfo *root, */ foreach(lc, innerrel->unique_for_rels) { - Relids unique_for_rels = (Relids) lfirst(lc); + unique_info = (UniqueRelInfo *) lfirst(lc); - if (bms_is_subset(unique_for_rels, outerrelids)) + if (bms_is_subset(unique_info->outerrelids, outerrelids)) + { + if (unique_info_out) + *unique_info_out = unique_info; return true; /* Success! */ + } } /* @@ -1012,7 +998,7 @@ innerrel_is_unique(PlannerInfo *root, /* No cached information, so try to make the proof. */ if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel, - jointype, restrictlist)) + jointype, restrictlist, &unique_info)) { /* * Cache the positive result for future probes, being sure to keep it @@ -1025,10 +1011,15 @@ innerrel_is_unique(PlannerInfo *root, * supersets of them anyway. */ old_context = MemoryContextSwitchTo(root->planner_cxt); - innerrel->unique_for_rels = lappend(innerrel->unique_for_rels, - bms_copy(outerrelids)); + if (!unique_info) + unique_info = makeNode(UniqueRelInfo); + unique_info->outerrelids = bms_copy(outerrelids); + innerrel->unique_for_rels = lappend(innerrel->unique_for_rels, unique_info); MemoryContextSwitchTo(old_context); + if (unique_info_out) + *unique_info_out = unique_info; + return true; /* Success! */ } else @@ -1074,11 +1065,15 @@ is_innerrel_unique_for(PlannerInfo *root, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, - List *restrictlist) + List *restrictlist, + UniqueRelInfo **unique_info) { List *clause_list = NIL; ListCell *lc; + if (unique_info) + *unique_info = NULL; + /* * Search for mergejoinable clauses that constrain the inner rel against * the outer rel. If an operator is mergejoinable then it behaves like @@ -1116,5 +1111,1006 @@ is_innerrel_unique_for(PlannerInfo *root, } /* Let rel_is_distinct_for() do the hard work */ - return rel_is_distinct_for(root, innerrel, clause_list); + return rel_is_distinct_for(root, innerrel, clause_list, unique_info); +} + +typedef struct +{ + Index oldRelid; + Index newRelid; +} ChangeVarnoContext; + + +static bool +change_varno_walker(Node *node, ChangeVarnoContext *context) +{ + if (node == NULL) + return false; + + if (IsA(node, Var)) + { + Var* var = (Var*)node; + if (var->varno == context->oldRelid) + { + var->varno = context->newRelid; + var->varnoold = context->newRelid; + } + return false; + } + if (IsA(node, RestrictInfo)) + { + change_rinfo((RestrictInfo*)node, context->oldRelid, context->newRelid); + return false; + } + return expression_tree_walker(node, change_varno_walker, context); +} + +/* + * For all Vars in the expression that have varno = oldRelid, set + * varno = newRelid. + */ +static void +change_varno(Expr *expr, Index oldRelid, Index newRelid) +{ + ChangeVarnoContext context; + + context.oldRelid = oldRelid; + context.newRelid = newRelid; + change_varno_walker((Node *) expr, &context); +} + +/* + * Substitute newId for oldId in relids. + */ +static void +change_relid(Relids *relids, Index oldId, Index newId) +{ + if (bms_is_member(oldId, *relids)) + *relids = bms_add_member(bms_del_member(bms_copy(*relids), oldId), newId); +} + +static void +change_rinfo(RestrictInfo* rinfo, Index from, Index to) +{ + bool is_req_equal = + (rinfo->required_relids == rinfo->clause_relids) ? true : false; + + change_varno(rinfo->clause, from, to); + change_varno(rinfo->orclause, from, to); + change_relid(&rinfo->clause_relids, from, to); + if (is_req_equal) + rinfo->required_relids = rinfo->clause_relids; + else + change_relid(&rinfo->required_relids, from, to); + change_relid(&rinfo->left_relids, from, to); + change_relid(&rinfo->right_relids, from, to); + change_relid(&rinfo->outer_relids, from, to); + change_relid(&rinfo->nullable_relids, from, to); +} + +/* + * Update EC members to point to the remaining relation instead of the removed + * one, removing duplicates. + */ +static void +update_ec_members(EquivalenceClass *ec, Index toRemove, Index toKeep) +{ + int counter; + + for (counter = 0; counter < list_length(ec->ec_members); ) + { + ListCell *cell = list_nth_cell(ec->ec_members, counter); + EquivalenceMember *em = lfirst(cell); + int counter1; + + if (!bms_is_member(toRemove, em->em_relids)) + { + counter++; + continue; + } + + change_relid(&em->em_relids, toRemove, toKeep); + /* We only process inner joins */ +// Assert(bms_is_empty(em->em_nullable_relids)); + change_varno(em->em_expr, toRemove, toKeep); + + /* + * After we switched the equivalence member to the remaining relation, + * check that it is not the same as the existing member, and if it + * is, delete it. + */ + for (counter1 = 0; counter1 < list_length(ec->ec_members); counter1++) + { + EquivalenceMember *other; + + if (counter1 == counter) + continue; + + other = castNode(EquivalenceMember, list_nth(ec->ec_members, counter1)); + + if (equal(other->em_expr, em->em_expr)) + break; + } + + if (counter1 < list_length(ec->ec_members)) + ec->ec_members = list_delete_cell(ec->ec_members, cell); + else + counter++; + } +} + +/* + * Update EC sources to point to the remaining relation instead of the + * removed one. + */ +static void +update_ec_sources(List **sources, Index toRemove, Index toKeep) +{ + int counter; + int cc=0; + + for (counter = 0; counter < list_length(*sources); ) + { + ListCell *cell = list_nth_cell(*sources, counter); + RestrictInfo *rinfo = castNode(RestrictInfo, lfirst(cell)); + int counter1; + + if (!bms_is_member(toRemove, rinfo->required_relids)) + { + counter++; + continue; + } + + change_varno(rinfo->clause, toRemove, toKeep); + + /* + * After switching the clause to the remaining relation, check it for + * redundancy with existing ones. We don't have to check for + * redundancy with derived clauses, because we've just deleted them. + */ + for (counter1 = 0; counter1 < list_length(*sources); counter1++) + { + RestrictInfo *other; + + if (counter1 == counter) + continue; + + other = castNode(RestrictInfo, list_nth(*sources, counter1)); + if (equal(rinfo->clause, other->clause)) + break; + } + + if (counter1 < list_length(*sources)) + { + *sources = list_delete_cell(*sources, cell); + cc++; + } + else + { + counter++; + + /* We will keep this RestrictInfo, correct its relids. */ + change_rinfo(rinfo, toRemove, toKeep); + } + } +} + +/* + * Scratch space for the unique self join removal code. + */ +typedef struct +{ + PlannerInfo *root; + + /* Temporary array for relation ids. */ + Index *relids; + + /* + * Array of Relids, one for each relation, indexed by relation id. Each + * element is a set of relation ids with which this relation has a special + * join. + */ + Relids *special_join_rels; + + /* Array of row marks indexed by relid. */ + PlanRowMark **row_marks; + + /* Bitmapset for join relids that is used to avoid reallocation. */ + Relids joinrelids; +} UsjScratch; + + + +/* + * Remove a relation after we have proven that it participates only in an + * unneeded unique self join. + * + * The joinclauses list is destructively changed. + */ +static void +remove_self_join_rel(UsjScratch *scratch, Relids joinrelids, List *joinclauses, + RelOptInfo *toKeep, RelOptInfo *toRemove) +{ + PlannerInfo *root = scratch->root; + ListCell *cell; + + /* + * Include all eclass mentions of removed relation into the eclass mentions + * of kept relation. + */ + toKeep->eclass_indexes = bms_add_members(toRemove->eclass_indexes, + toKeep->eclass_indexes); + + /* + * Transfer join and restriction clauses from the removed relation to the + * remaining one. We change the Vars of the clause to point to the + * remaining relation instead of the removed one. The clauses that require + * a subset of joinrelids become restriction clauses of the remaining + * relation, and others remain join clauses. We append them to + * baserestrictinfo and joininfo respectively, trying not to introduce + * duplicates. + * + * We also have to process the 'joinclauses' list here, because it + * contains EC-derived join clauses which must become filter clauses. It + * is not enough to just correct the ECs, because the EC-derived + * restrictions are generated before join removal (see + * generate_base_implied_equalities). + */ + + /* Replace removed relation in joininfo */ + foreach(cell, toKeep->joininfo) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, cell); + + change_rinfo(rinfo, toRemove->relid, toKeep->relid); + + /* + * If this clause is a mergejoinable equality clause that compares a + * variable to itself, i.e., has the form of "X=X", replace it with + * null test. + */ + if (rinfo->mergeopfamilies && IsA(rinfo->clause, OpExpr)) + { + Expr *leftOp, *rightOp; + + leftOp = (Expr *) get_leftop(rinfo->clause); + rightOp = (Expr *) get_rightop(rinfo->clause); + + if (leftOp != NULL && equal(leftOp, rightOp)) + { + NullTest *nullTest = makeNode(NullTest); + nullTest->arg = leftOp; + nullTest->nulltesttype = IS_NOT_NULL; + nullTest->argisrow = false; + nullTest->location = -1; + rinfo->clause = (Expr*)nullTest; + } + } + } + + foreach(cell, joinclauses) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, cell); + + change_rinfo(rinfo, toRemove->relid, toKeep->relid); + + /* + * If this clause is a mergejoinable equality clause that compares a + * variable to itself, i.e., has the form of "X=X", replace it with + * null test. + */ + if (rinfo->mergeopfamilies && IsA(rinfo->clause, OpExpr)) + { + Expr *leftOp, *rightOp; + + leftOp = (Expr *) get_leftop(rinfo->clause); + rightOp = (Expr *) get_rightop(rinfo->clause); + + if (leftOp != NULL && equal(leftOp, rightOp)) + { + NullTest *nullTest = makeNode(NullTest); + nullTest->arg = leftOp; + nullTest->nulltesttype = IS_NOT_NULL; + nullTest->argisrow = false; + nullTest->location = -1; + rinfo->clause = (Expr*)nullTest; + toKeep->baserestrictinfo = lappend(toKeep->baserestrictinfo, rinfo); + } + } + } + + /* Tranfer removed relations clauses to kept relation */ + foreach(cell, toRemove->baserestrictinfo) + { + ListCell *otherCell; + RestrictInfo *rinfo = lfirst_node(RestrictInfo, cell); + List **target = &toKeep->baserestrictinfo; + + /* + * Replace the references to the removed relation with references to + * the remaining one. We won't necessarily add this clause, because + * it may be already present in the joininfo or baserestrictinfo. + * Still, we have to switch it to point to the remaining relation. + * This is important for join clauses that reference both relations, + * because they are included in both joininfos. + */ + change_rinfo(rinfo, toRemove->relid, toKeep->relid); + + /* + * Don't add the clause if it is already present in the list, or + * derived from the same equivalence class, or is the same as another + * clause. + */ + foreach (otherCell, *target) + { + RestrictInfo *other = lfirst(otherCell); + if (other == rinfo + || (rinfo->parent_ec != NULL + && other->parent_ec == rinfo->parent_ec) + || equal(rinfo->clause, other->clause)) + { + break; + } + } + + /* We can't have an EC-derived clause that joins to some third relation */ + if (otherCell != NULL) + continue; + + *target = lappend(*target, rinfo); + } + + /* Tranfer removed relations clauses to kept relation */ + foreach(cell, toRemove->joininfo) + { + ListCell *otherCell; + RestrictInfo *rinfo = lfirst_node(RestrictInfo, cell); + List **target = &toKeep->joininfo; + + /* + * Replace the references to the removed relation with references to + * the remaining one. We won't necessarily add this clause, because + * it may be already present in the joininfo or baserestrictinfo. + * Still, we have to switch it to point to the remaining relation. + * This is important for join clauses that reference both relations, + * because they are included in both joininfos. + */ + change_rinfo(rinfo, toRemove->relid, toKeep->relid); + + /* + * Don't add the clause if it is already present in the list, or + * derived from the same equivalence class, or is the same as another + * clause. + */ + foreach (otherCell, *target) + { + RestrictInfo *other = lfirst(otherCell); + if (other == rinfo + || (rinfo->parent_ec != NULL + && other->parent_ec == rinfo->parent_ec) + || equal(rinfo->clause, other->clause)) + { + break; + } + } + + if (otherCell != NULL) + continue; + + /* + * If this clause is a mergejoinable equality clause that compares a + * variable to itself, i.e., has the form of "X=X", replace it with + * null test. + */ + if (rinfo->mergeopfamilies && IsA(rinfo->clause, OpExpr)) + { + Expr *leftOp, *rightOp; + + Assert(IsA(rinfo->clause, OpExpr)); + + leftOp = (Expr *) get_leftop(rinfo->clause); + rightOp = (Expr *) get_rightop(rinfo->clause); + + if (leftOp != NULL && equal(leftOp, rightOp)) + { + NullTest *nullTest = makeNode(NullTest); + nullTest->arg = leftOp; + nullTest->nulltesttype = IS_NOT_NULL; + nullTest->argisrow = false; + nullTest->location = -1; + rinfo->clause = (Expr*)nullTest; + } + } + + *target = lappend(*target, rinfo); + } + + /* + * Transfer the targetlist and attr_needed flags. + */ + Assert(toRemove->reltarget->sortgrouprefs == 0); + + foreach (cell, toRemove->reltarget->exprs) + { + Expr *node = lfirst(cell); + change_varno(node, toRemove->relid, toKeep->relid); + if (!list_member(toKeep->reltarget->exprs, node)) + toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node); + } + + for (int i = toKeep->min_attr; i <= toKeep->max_attr; i++) + { + int attno = i - toKeep->min_attr; + toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno], + toRemove->attr_needed[attno]); + } + + /* + * If the removed relation has a row mark, transfer it to the remaining + * one. + * + * If both rels have row marks, just keep the one corresponding to the + * remaining relation, because we verified earlier that they have the same + * strength. + * + * Also make sure that the scratch->row_marks cache is up to date, because + * we are going to use it for further join removals. + */ + if (scratch->row_marks[toRemove->relid]) + { + PlanRowMark **markToRemove = &scratch->row_marks[toRemove->relid]; + PlanRowMark **markToKeep = &scratch->row_marks[toKeep->relid]; + + if (*markToKeep) + { + Assert((*markToKeep)->markType == (*markToRemove)->markType); + + root->rowMarks = list_delete_ptr(root->rowMarks, *markToKeep); + *markToKeep = NULL; + } + else + { + *markToKeep = *markToRemove; + *markToRemove = NULL; + + /* Shouldn't have inheritance children here. */ + Assert((*markToKeep)->rti == (*markToKeep)->prti); + + (*markToKeep)->rti = toKeep->relid; + (*markToKeep)->prti = toKeep->relid; + } + } + + /* + * Likewise replace references in SpecialJoinInfo data structures. + * + * This is relevant in case the join we're deleting is nested inside some + * special joins: the upper joins' relid sets have to be adjusted. + */ + foreach (cell, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(cell); + + change_relid(&sjinfo->min_lefthand, toRemove->relid, toKeep->relid); + change_relid(&sjinfo->min_righthand, toRemove->relid, toKeep->relid); + change_relid(&sjinfo->syn_lefthand, toRemove->relid, toKeep->relid); + change_relid(&sjinfo->syn_righthand, toRemove->relid, toKeep->relid); + change_varno((Expr*)sjinfo->semi_rhs_exprs, toRemove->relid, toKeep->relid); + } + + /* + * Likewise update references in PlaceHolderVar data structures. + */ + foreach(cell, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(cell); + + /* + * We are at an inner join of two base relations. A placeholder can't + * be needed here or evaluated here. + */ + Assert(!bms_is_subset(phinfo->ph_eval_at, joinrelids)); + Assert(!bms_is_subset(phinfo->ph_needed, joinrelids)); + + change_relid(&phinfo->ph_eval_at, toRemove->relid, toKeep->relid); + change_relid(&phinfo->ph_needed, toRemove->relid, toKeep->relid); + change_relid(&phinfo->ph_lateral, toRemove->relid, toKeep->relid); + change_relid(&phinfo->ph_var->phrels, toRemove->relid, toKeep->relid); + } + + /* + * Update the equivalence classes that reference the removed relations. + */ + foreach(cell, root->eq_classes) + { + EquivalenceClass *ec = lfirst(cell); + + if (!bms_is_member(toRemove->relid, ec->ec_relids)) + { + /* + * This EC doesn't reference the removed relation, nothing to be + * done for it. + */ + continue; + } + + /* + * Update the EC members to reference the remaining relation instead + * of the removed one. + */ + update_ec_members(ec, toRemove->relid, toKeep->relid); + change_relid(&ec->ec_relids, toRemove->relid, toKeep->relid); + + /* + * We will now update source and derived clauses of the EC. + * + * Restriction clauses for base relations are already distributed to + * the respective baserestrictinfo lists (see + * generate_implied_equalities). The above code has already processed + * this list, and updated these clauses to reference the remaining + * relation, so we can skip them here based on their relids. + * + * Likewise, we have already processed the join clauses that join the + * removed relation to the remaining one. + * + * Finally, there are join clauses that join the removed relation to + * some third relation. We can't just delete the source clauses and + * regenerate them from the EC, because the corresponding equality + * operators might be missing (see the handling of ec_broken). + * Therefore, we will update the references in the source clauses. + * + * Derived clauses can be generated again, so it is simpler to just + * delete them. + */ + list_free(ec->ec_derives); + ec->ec_derives = NULL; + update_ec_sources(&ec->ec_sources, toRemove->relid, toKeep->relid); + } + + /* + * Mark the rel as "dead" to show it is no longer part of the join tree. + * (Removing it from the baserel array altogether seems too risky.) + */ + toRemove->reloptkind = RELOPT_DEADREL; + + /* + * Update references to the removed relation from other baserels. + */ + for (int i = 1; i < root->simple_rel_array_size; i++) + { + RelOptInfo *otherrel = root->simple_rel_array[i]; + int attroff; + + /* no point in processing target rel itself */ + if (i == toRemove->relid) + continue; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (otherrel == NULL) + continue; + + Assert(otherrel->relid == i); /* sanity check on array */ + + /* Update attr_needed arrays. */ + for (attroff = otherrel->max_attr - otherrel->min_attr; + attroff >= 0; attroff--) + { + change_relid(&otherrel->attr_needed[attroff], toRemove->relid, + toKeep->relid); + } + + /* Update lateral references. */ + change_varno((Expr*)otherrel->lateral_vars, toRemove->relid, toKeep->relid); + } + + /* + * Change varno in some special cases with non-trivial RangeTblEntry + */ + foreach(cell, root->parse->rtable) + { + RangeTblEntry *rte = lfirst(cell); + + switch(rte->rtekind) + { + case RTE_FUNCTION: + change_varno((Expr*)rte->functions, toRemove->relid, toKeep->relid); + break; + case RTE_TABLEFUNC: + change_varno((Expr*)rte->tablefunc, toRemove->relid, toKeep->relid); + break; + case RTE_VALUES: + change_varno((Expr*)rte->values_lists, toRemove->relid, toKeep->relid); + break; + default: + /* no op */ + break; + } + } +} + +/* + * Test whether the relations are joined on the same unique column. + */ +static bool +is_unique_self_join(PlannerInfo *root, Relids joinrelids, RelOptInfo *outer, + RelOptInfo *inner, List *restrictlist) +{ + UniqueRelInfo *outerinfo = NULL; + UniqueRelInfo *innerinfo = NULL; + List *outerValues, *innerValues; + + innerrel_is_unique(root, joinrelids, inner->relids, + outer, JOIN_INNER, list_concat(list_concat_unique(list_copy(outer->joininfo), restrictlist), + outer->baserestrictinfo), true, &outerinfo); + if (!outerinfo || !outerinfo->index) + return false; + + innerrel_is_unique(root, joinrelids, outer->relids, + inner, JOIN_INNER, list_concat(list_concat_unique(list_copy(inner->joininfo), restrictlist), + inner->baserestrictinfo), true, &innerinfo); + if (!innerinfo || !innerinfo->index) + return false; + + /* We must have the same unique index for both relations. */ + if (outerinfo->index->indexoid != innerinfo->index->indexoid) + return false; + + if (restrictlist != NULL && IsA(((RestrictInfo*)linitial(restrictlist))->clause, Const)) + return false; + + /* + * We have proven that for both relations, the same unique index guarantees + * that there is at most one row where columns equal given values. These + * values must be the same for both relations, or else we won't match the + * same row on each side of join. A value may be either Const or Var of some + * other relation. For the purposes of this proof, the Vars of the inner and + * outer relation are the same, so we replace outer varno with inner and + * compare the column values using equal(). + */ + innerValues = copyObject(innerinfo->column_values); + outerValues = copyObject(outerinfo->column_values); + change_varno((Expr *) innerValues, outer->relid, inner->relid); + change_varno((Expr *) outerValues, outer->relid, inner->relid); + if (!equal(outerValues, innerValues)) + { + list_free_deep(outerValues); + list_free_deep(innerValues); + return false; + } + list_free_deep(outerValues); + list_free_deep(innerValues); + + return true; +} + +/* + * Find and remove unique self joins in a group of base relations that have + * the same Oid. + * + * Returns a set of relids that were removed. + */ +static Relids +remove_self_joins_one_group(UsjScratch *scratch, Index *relids, int n) +{ + PlannerInfo *root = scratch->root; + Relids joinrelids = scratch->joinrelids; + Relids result = NULL; + int i, o; + + if (n < 2) + return NULL; + + for (o = 0; o < n; o++) + { + RelOptInfo *outer = root->simple_rel_array[relids[o]]; + + for (i = o + 1; i < n; i++) + { + RelOptInfo *inner = root->simple_rel_array[relids[i]]; + List *restrictlist; + ListCell *cell; + + /* A sanity check: the relations have the same Oid. */ + Assert(root->simple_rte_array[relids[i]]->relid + == root->simple_rte_array[relids[o]]->relid); + + /* + * This optimization applies to inner joins only, so skip any + * relations that form a special join. + */ + if (bms_is_member(relids[i], scratch->special_join_rels[relids[o]])) + continue; + + /* Reuse joinrelids bitset to avoid reallocation. */ + joinrelids = bms_del_members(joinrelids, joinrelids); + + /* + * We only deal with base rels here, so their relids bitset + * contains only one member -- their relid. + */ + joinrelids = bms_add_member(joinrelids, relids[o]); + joinrelids = bms_add_member(joinrelids, relids[i]); + + /* Is it a unique self join? */ + restrictlist = build_joinrel_restrictlist(root, joinrelids, outer, + inner); + if (!is_unique_self_join(root, joinrelids, outer, inner, + restrictlist)) + continue; + + /* + * We can't remove the join if the relations have row marks of + * different strength (e.g. one is locked FOR UPDATE and another + * just has ROW_MARK_REFERENCE for EvalPlanQual rechecking). + */ + if (scratch->row_marks[relids[i]] && scratch->row_marks[relids[o]] + && scratch->row_marks[relids[i]]->markType + != scratch->row_marks[relids[o]]->markType) + { + continue; + } + + /* + * Be safe to do not remove table participated in complicated PH + */ + foreach(cell, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(cell); + + /* there isn't any other place to eval PHV */ + if (bms_is_subset(phinfo->ph_eval_at, joinrelids) || + bms_is_subset(phinfo->ph_needed, joinrelids)) + break; + } + + if (cell) + continue; + + /* + * We can remove either relation, so remove the outer one, to + * simplify this loop. + */ + remove_self_join_rel(scratch, joinrelids, restrictlist, inner, outer); + result = bms_add_member(result, relids[o]); + + /* + * Replace varno in root targetlist and HAVING clause. + */ + change_varno((Expr *) root->processed_tlist, relids[o], relids[i]); + change_varno((Expr *) root->parse->havingQual, relids[o], relids[i]); + + /* We removed the outer relation, try the next one. */ + break; + } + } + + scratch->joinrelids = joinrelids; + return result; +} + +/* + * A qsort comparator to sort the relids by the relation Oid. + */ +static int +compare_rte(const Index *left, const Index *right, PlannerInfo *root) +{ + Oid l = root->simple_rte_array[*left]->relid; + Oid r = root->simple_rte_array[*right]->relid; + + return l < r ? 1 : (l == r ? 0 : -1); +} + +/* + * Find and remove unique self joins on a particular level of the join tree. + * + * We sort the relations by Oid and then examine each group with the same Oid. + * If we removed any relation, remove it from joinlist as well. + */ +static void +remove_self_joins_one_level(UsjScratch *scratch, List **joinlist) +{ + ListCell *cell; + int counter; + Relids relidsToRemove = NULL; + Oid groupOid; + int groupStart; + int i; + int n = 0; + Index *relid_ascending = scratch->relids; + PlannerInfo *root = scratch->root; + + /* + * Collect the ids of base relations at this level of the join tree. + */ + foreach (cell, *joinlist) + { + RangeTblEntry *rte; + RelOptInfo *rel; + RangeTblRef *ref = (RangeTblRef *) lfirst(cell); + if (!IsA(ref, RangeTblRef)) + continue; + + rte = root->simple_rte_array[ref->rtindex]; + rel = root->simple_rel_array[ref->rtindex]; + + /* We only care about base relations from which we select something. */ + if (rte->rtekind != RTE_RELATION || rte->relkind != RELKIND_RELATION + || rel == NULL) + { + continue; + } + + /* + * This optimization won't work for tables that have inheritance + * children. + */ + if (rte->inh) + continue; + + relid_ascending[n++] = ref->rtindex; + + /* + * Limit the number of joins we process to control the quadratic + * behavior. + */ + if (n > join_collapse_limit) + break; + } + + if (n < 2) + return; + + /* + * Find and process the groups of relations that have same Oid. + */ + qsort_arg(relid_ascending, n, sizeof(*relid_ascending), + (qsort_arg_comparator) compare_rte, root); + groupOid = root->simple_rte_array[relid_ascending[0]]->relid; + groupStart = 0; + for (i = 1; i < n; i++) + { + RangeTblEntry *rte = root->simple_rte_array[relid_ascending[i]]; + Assert(rte->relid != InvalidOid); + if (rte->relid != groupOid) + { + relidsToRemove = bms_add_members(relidsToRemove, + remove_self_joins_one_group(scratch, &relid_ascending[groupStart], + i - groupStart)); + groupOid = rte->relid; + groupStart = i; + } + } + Assert(groupOid != InvalidOid); + Assert(groupStart < n); + relidsToRemove = bms_add_members(relidsToRemove, + remove_self_joins_one_group(scratch, &relid_ascending[groupStart], + n - groupStart)); + + /* Delete the removed relations from joinlist. */ + for (counter = 0; counter < list_length(*joinlist); ) + { + Node *node; + + cell = list_nth_cell(*joinlist, counter); + node = (Node *) lfirst(cell); + + if (IsA(node, RangeTblRef) + && bms_is_member(((RangeTblRef *) node)->rtindex, relidsToRemove)) + { + *joinlist = list_delete_cell(*joinlist, cell); + } + else + counter++; + } +} + +/* + * Find and remove unique self joins on a single level of a join tree, and + * recurse to handle deeper levels. + */ +static void +remove_self_joins_recurse(UsjScratch *scratch, List **joinlist) +{ + ListCell *lc; + foreach (lc, *joinlist) + { + switch (((Node *) lfirst(lc))->type) + { + case T_List: + remove_self_joins_recurse(scratch, (List **) &lfirst(lc)); + break; + case T_RangeTblRef: + break; + default: + Assert(false); + } + } + remove_self_joins_one_level(scratch, joinlist); +} + +/* + * Find and remove useless self joins. + * + * We search for joins where the same relation is joined to itself on all + * columns of some unique index. If this condition holds, then, for + * each outer row, only one inner row matches, and it is the same row + * of the same relation. This allows us to remove the join and replace + * it with a scan that combines WHERE clauses from both sides. The join + * clauses themselves assume the form of X = X and can be replaced with + * NOT NULL clauses. + * + * For the sake of simplicity, we don't apply this optimization to special + * joins. Here is a list of what we could do in some particular cases: + * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins, + * and then removed normally. + * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND + * (IS NULL on join columns OR NOT inner quals)'. + * 'a a1 left join a a2': could simplify to a scan like inner, but without + * NOT NULL condtions on join columns. + * 'a a1 left join (a a2 join b)': can't simplify this, because join to b + * can both remove rows and introduce duplicates. + * + * To search for removable joins, we order all the relations on their Oid, + * go over each set with the same Oid, and consider each pair of relations + * in this set. We check that both relation are made unique by the same + * unique index with the same clauses. + * + * To remove the join, we mark one of the participating relation as + * dead, and rewrite all references to it to point to the remaining + * relation. This includes modifying RestrictInfos, EquivalenceClasses and + * EquivalenceMembers. We also have to modify the row marks. The join clauses + * of the removed relation become either restriction or join clauses, based on + * whether they reference any relations not participating in the removed join. + * + * 'targetlist' is the top-level targetlist of query. If it has any references + * to the removed relations, we update them to point to the remaining ones. + */ +void +remove_useless_self_joins(PlannerInfo *root, List **joinlist) +{ + ListCell *lc; + UsjScratch scratch; + + scratch.root = root; + scratch.relids = palloc(root->simple_rel_array_size * sizeof(Index)); + scratch.special_join_rels = palloc0(root->simple_rel_array_size * sizeof(Relids)); + scratch.row_marks = palloc0(root->simple_rel_array_size * sizeof(PlanRowMark *)); + scratch.joinrelids = NULL; + + /* Find out which relations have special joins to which. */ + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc); + int bit = -1; + while ((bit = bms_next_member(info->syn_lefthand, bit)) >= 0) + { + RelOptInfo *rel = find_base_rel(root, bit); + scratch.special_join_rels[rel->relid] = + bms_add_members(scratch.special_join_rels[rel->relid], + info->syn_righthand); + } + + bit = -1; + while ((bit = bms_next_member(info->syn_righthand, bit)) >= 0) + { + RelOptInfo *rel = find_base_rel(root, bit); + scratch.special_join_rels[rel->relid] = + bms_add_members(scratch.special_join_rels[rel->relid], + info->syn_lefthand); + } + } + + /* Collect row marks. */ + foreach (lc, root->rowMarks) + { + PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc); + + /* Can't have more than one row mark for a relation. */ + Assert(scratch.row_marks[rowMark->rti] == NULL); + + scratch.row_marks[rowMark->rti] = rowMark; + } + + /* Finally, remove the joins. */ + remove_self_joins_recurse(&scratch, joinlist); } diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index f0c1b52..8413241 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -218,7 +218,7 @@ query_planner(PlannerInfo *root, * jointree preprocessing, but the necessary information isn't available * until we've built baserel data structures and classified qual clauses. */ - joinlist = remove_useless_joins(root, joinlist); + joinlist = remove_useless_left_joins(root, joinlist); /* * Also, reduce any semijoins with unique inner rels to plain inner joins. @@ -227,6 +227,11 @@ query_planner(PlannerInfo *root, reduce_unique_semijoins(root); /* + * Remove self joins on a unique column. + */ + remove_useless_self_joins(root, &joinlist); + + /* * Now distribute "placeholders" to base rels as needed. This has to be * done after join removal because removal could change whether a * placeholder is evaluable at a base rel. diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 34acb73..1466ec0 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1598,7 +1598,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree && relation_has_unique_index_for(root, rel, NIL, sjinfo->semi_rhs_exprs, - sjinfo->semi_operators)) + sjinfo->semi_operators, + NULL /*index_info*/)) { pathnode->umethod = UNIQUE_PATH_NOOP; pathnode->path.rows = rel->rows; diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 8541538..88eacd2 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -39,14 +39,10 @@ typedef struct JoinHashEntry static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *input_rel); -static List *build_joinrel_restrictlist(PlannerInfo *root, - RelOptInfo *joinrel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel); static void build_joinrel_joinlist(RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel); -static List *subbuild_joinrel_restrictlist(RelOptInfo *joinrel, +static List *subbuild_joinrel_restrictlist(Relids joinrelids, List *joininfo_list, List *new_restrictlist); static List *subbuild_joinrel_joinlist(RelOptInfo *joinrel, @@ -589,7 +585,7 @@ build_join_rel(PlannerInfo *root, */ if (restrictlist_ptr) *restrictlist_ptr = build_joinrel_restrictlist(root, - joinrel, + joinrel->relids, outer_rel, inner_rel); return joinrel; @@ -693,7 +689,7 @@ build_join_rel(PlannerInfo *root, * caller might or might not need the restrictlist, but I need it anyway * for set_joinrel_size_estimates().) */ - restrictlist = build_joinrel_restrictlist(root, joinrel, + restrictlist = build_joinrel_restrictlist(root, joinrel->relids, outer_rel, inner_rel); if (restrictlist_ptr) *restrictlist_ptr = restrictlist; @@ -1013,7 +1009,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, * the various joinlist entries ultimately refer to RestrictInfos * pushed into them by distribute_restrictinfo_to_rels(). * - * 'joinrel' is a join relation node + * 'joinrelids' is a join relation id set * 'outer_rel' and 'inner_rel' are a pair of relations that can be joined * to form joinrel. * @@ -1026,9 +1022,9 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, * RestrictInfo nodes are no longer context-dependent. Instead, just include * the original nodes in the lists made for the join relation. */ -static List * +List * build_joinrel_restrictlist(PlannerInfo *root, - RelOptInfo *joinrel, + Relids joinrelids, RelOptInfo *outer_rel, RelOptInfo *inner_rel) { @@ -1039,8 +1035,8 @@ build_joinrel_restrictlist(PlannerInfo *root, * eliminating any duplicates (important since we will see many of the * same clauses arriving from both input relations). */ - result = subbuild_joinrel_restrictlist(joinrel, outer_rel->joininfo, NIL); - result = subbuild_joinrel_restrictlist(joinrel, inner_rel->joininfo, result); + result = subbuild_joinrel_restrictlist(joinrelids, outer_rel->joininfo, NIL); + result = subbuild_joinrel_restrictlist(joinrelids, inner_rel->joininfo, result); /* * Add on any clauses derived from EquivalenceClasses. These cannot be @@ -1049,7 +1045,7 @@ build_joinrel_restrictlist(PlannerInfo *root, */ result = list_concat(result, generate_join_implied_equalities(root, - joinrel->relids, + joinrelids, outer_rel->relids, inner_rel)); @@ -1075,7 +1071,7 @@ build_joinrel_joinlist(RelOptInfo *joinrel, } static List * -subbuild_joinrel_restrictlist(RelOptInfo *joinrel, +subbuild_joinrel_restrictlist(Relids joinrelids, List *joininfo_list, List *new_restrictlist) { @@ -1085,7 +1081,7 @@ subbuild_joinrel_restrictlist(RelOptInfo *joinrel, { RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - if (bms_is_subset(rinfo->required_relids, joinrel->relids)) + if (bms_is_subset(rinfo->required_relids, joinrelids)) { /* * This clause becomes a restriction clause for the joinrel, since diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c index 6b63d6f..bfc0036 100644 --- a/src/backend/utils/mmgr/aset.c +++ b/src/backend/utils/mmgr/aset.c @@ -1427,7 +1427,6 @@ AllocSetCheck(MemoryContext context) chsize = chunk->size; /* aligned chunk size */ dsize = chunk->requested_size; /* real data */ - /* * Check chunk size */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index bce2d59..78b4f1e 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -272,6 +272,7 @@ typedef enum NodeTag T_RollupData, T_GroupingSetData, T_StatisticExtInfo, + T_UniqueRelInfo, /* * TAGS FOR MEMORY NODES (memnodes.h) diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 23a06d7..f9bb078 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -534,8 +534,11 @@ typedef struct PartitionSchemeData *PartitionScheme; * populate these fields, for base rels; but someday they might be used for * join rels too: * - * unique_for_rels - list of Relid sets, each one being a set of other - * rels for which this one has been proven unique + * unique_for_rels - list of UniqueRelInfos, each of them recording + * a set of other rels for which this one has been proven + * unique. If this is a baserel that is made unique by an + * index, UniqueRelInfo also stores the information about + * that index. * non_unique_for_rels - list of Relid sets, each one being a set of * other rels for which we have tried and failed to prove * this one unique @@ -2502,4 +2505,19 @@ typedef struct JoinCostWorkspace double inner_rows_total; } JoinCostWorkspace; +/* + * UniqueRelInfo records a fact that a relation is unique when being joined + * to other relation(s) specified by outerrelids. If the uniqueness is + * guaranteed by a unique index, this index is also saved. The values that + * constrain index columns, be it Vars of outer relations or Consts, are saved + * to column_values list. + */ +typedef struct UniqueRelInfo +{ + NodeTag tag; + Relids outerrelids; + IndexOptInfo *index; + List *column_values; +} UniqueRelInfo; + #endif /* PATHNODES_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index a12af54..1c2d064 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -288,6 +288,10 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, List **restrictlist_ptr); +extern List *build_joinrel_restrictlist(PlannerInfo *root, + Relids joinrelids, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel); extern Relids min_join_parameterization(PlannerInfo *root, Relids joinrelids, RelOptInfo *outer_rel, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 7345137..9814f00 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -72,7 +72,8 @@ extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel); extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel); extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist, - List *exprlist, List *oprlist); + List *exprlist, List *oprlist, + UniqueRelInfo **info); extern bool indexcol_is_bool_constant_for_query(IndexOptInfo *index, int indexcol); extern bool match_index_to_operand(Node *operand, int indexcol, diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index e7aaddd..785828a 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -16,6 +16,7 @@ #include "nodes/pathnodes.h" #include "nodes/plannodes.h" +#include "optimizer/paths.h" /* GUC parameters */ #define DEFAULT_CURSOR_TUPLE_FRACTION 0.1 @@ -96,13 +97,15 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root); /* * prototypes for plan/analyzejoins.c */ -extern List *remove_useless_joins(PlannerInfo *root, List *joinlist); +extern List *remove_useless_left_joins(PlannerInfo *root, List *joinlist); extern void reduce_unique_semijoins(PlannerInfo *root); extern bool query_supports_distinctness(Query *query); extern bool query_is_distinct_for(Query *query, List *colnos, List *opids); extern bool innerrel_is_unique(PlannerInfo *root, Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel, - JoinType jointype, List *restrictlist, bool force_cache); + JoinType jointype, List *restrictlist, bool force_cache, + UniqueRelInfo **unique_info); +extern void remove_useless_self_joins(PlannerInfo *root, List **jointree); /* * prototypes for plan/setrefs.c diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out index c448d85..a57905f 100644 --- a/src/test/regress/expected/equivclass.out +++ b/src/test/regress/expected/equivclass.out @@ -439,3 +439,35 @@ explain (costs off) Filter: ((unique1 = unique1) OR (unique2 = unique2)) (2 rows) +-- Test that broken ECs are processed correctly during self join removal. +-- Disable merge joins so that we don't get an error about missing commutator. +-- Test both orientations of the join clause, because only one of them breaks +-- the EC. +set enable_mergejoin to off; +explain (costs off) + select * from ec0 m join ec0 n on m.ff = n.ff + join ec1 p on m.ff + n.ff = p.f1; + QUERY PLAN +---------------------------------------- + Nested Loop + Join Filter: ((n.ff + n.ff) = p.f1) + -> Seq Scan on ec1 p + -> Materialize + -> Seq Scan on ec0 n + Filter: (ff IS NOT NULL) +(6 rows) + +explain (costs off) + select * from ec0 m join ec0 n on m.ff = n.ff + join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1; + QUERY PLAN +--------------------------------------------------------------- + Nested Loop + Join Filter: ((p.f1)::bigint = ((n.ff + n.ff))::int8alias1) + -> Seq Scan on ec1 p + -> Materialize + -> Seq Scan on ec0 n + Filter: (ff IS NOT NULL) +(6 rows) + +reset enable_mergejoin; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index b58d560..3080539 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4648,6 +4648,270 @@ select * from (0 rows) -- +-- test that semi- or inner self-joins on a unique column are removed +-- +-- enable only nestloop to get more predictable plans +set enable_hashjoin to off; +set enable_mergejoin to off; +create table sj (a int unique, b int); +insert into sj values (1, null), (null, 2), (2, 1); +analyze sj; +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; + a | b +---+--- + 2 | 1 +(1 row) + +explain (costs off) +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; + QUERY PLAN +----------------------------------------------- + Seq Scan on sj q + Filter: ((a IS NOT NULL) AND (b = (a - 1))) +(2 rows) + +explain (costs off) +select * from sj p +where exists (select * from sj q + where q.a = p.a and q.b < 10); + QUERY PLAN +------------------------------------------ + Seq Scan on sj q + Filter: ((a IS NOT NULL) AND (b < 10)) +(2 rows) + +-- Double self-join removal. +-- Use a condition on "b + 1", not on "b", for the second join, so that +-- the equivalence class is different from the first one, and we can +-- test the non-ec code path. +explain (costs off) +select * from sj t1 join sj t2 on t1.a = t2.a and t1.b = t2.b + join sj t3 on t2.a = t3.a and t2.b + 1 = t3.b + 1; + QUERY PLAN +--------------------------------------------------------------------------- + Seq Scan on sj t3 + Filter: ((a IS NOT NULL) AND (b IS NOT NULL) AND ((b + 1) IS NOT NULL)) +(2 rows) + +-- subselect that references the removed relation +explain (costs off) +select t1.a, (select a from sj where a = t2.a and a = t1.a) +from sj t1, sj t2 +where t1.a = t2.a; + QUERY PLAN +------------------------------------------ + Seq Scan on sj t2 + Filter: (a IS NOT NULL) + SubPlan 1 + -> Result + One-Time Filter: (t2.a = t2.a) + -> Seq Scan on sj + Filter: (a = t2.a) +(7 rows) + +-- self-join under outer join +explain (costs off) +select * from sj x join sj y on x.a = y.a +left join int8_tbl z on x.a = z.q1; + QUERY PLAN +------------------------------------ + Nested Loop Left Join + Join Filter: (y.a = z.q1) + -> Seq Scan on sj y + Filter: (a IS NOT NULL) + -> Materialize + -> Seq Scan on int8_tbl z +(6 rows) + +explain (costs off) +select * from sj x join sj y on x.a = y.a +left join int8_tbl z on y.a = z.q1; + QUERY PLAN +------------------------------------ + Nested Loop Left Join + Join Filter: (y.a = z.q1) + -> Seq Scan on sj y + Filter: (a IS NOT NULL) + -> Materialize + -> Seq Scan on int8_tbl z +(6 rows) + +-- Test that placeholders are updated correctly after join removal +explain (costs off) +select * from (values (1)) x +left join (select coalesce(y.q1, 1) from int8_tbl y + right join sj j1 inner join sj j2 on j1.a = j2.a + on true) z +on true; + QUERY PLAN +------------------------------------------ + Nested Loop Left Join + -> Result + -> Nested Loop Left Join + -> Seq Scan on sj j2 + Filter: (a IS NOT NULL) + -> Materialize + -> Seq Scan on int8_tbl y +(7 rows) + +-- Test that OR predicated are updated correctly after join removal +CREATE TABLE tab_with_flag ( id INT PRIMARY KEY, is_flag SMALLINT); +CREATE INDEX idx_test_is_flag ON tab_with_flag (is_flag); +explain (costs off) +SELECT COUNT(*) FROM tab_with_flag WHERE (is_flag IS NULL OR is_flag = 0) AND id IN (SELECT id FROM tab_with_flag WHERE id IN (2, 3)); + QUERY PLAN +---------------------------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on tab_with_flag + Recheck Cond: ((id = ANY ('{2,3}'::integer[])) AND (id IS NOT NULL)) + Filter: ((is_flag IS NULL) OR (is_flag = 0)) + -> Bitmap Index Scan on tab_with_flag_pkey + Index Cond: ((id = ANY ('{2,3}'::integer[])) AND (id IS NOT NULL)) +(6 rows) + +DROP TABLE tab_with_flag; +-- HAVING clause +explain (costs off) +select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1; + QUERY PLAN +--------------------------------- + HashAggregate + Group Key: q.b + Filter: (sum(q.a) = 1) + -> Seq Scan on sj q + Filter: (a IS NOT NULL) +(5 rows) + +-- update lateral references and range table entry reference +explain (verbose, costs off) +select 1 from (select x.* from sj x, sj y where x.a = y.a) q, + lateral generate_series(1, q.a) gs(i); + QUERY PLAN +------------------------------------------------------ + Nested Loop + Output: 1 + -> Seq Scan on public.sj y + Output: y.a, y.b + Filter: (y.a IS NOT NULL) + -> Function Scan on pg_catalog.generate_series gs + Output: gs.i + Function Call: generate_series(1, y.a) +(8 rows) + +explain (verbose, costs off) +select 1 from (select y.* from sj x, sj y where x.a = y.a) q, + lateral generate_series(1, q.a) gs(i); + QUERY PLAN +------------------------------------------------------ + Nested Loop + Output: 1 + -> Seq Scan on public.sj y + Output: y.a, y.b + Filter: (y.a IS NOT NULL) + -> Function Scan on pg_catalog.generate_series gs + Output: gs.i + Function Call: generate_series(1, y.a) +(8 rows) + +-- Test that a non-EC-derived join clause is processed correctly. Use an +-- outer join so that we can't form an EC. +explain (costs off) select * from sj p join sj q on p.a = q.a + left join sj r on p.a + q.a = r.a; + QUERY PLAN +------------------------------------ + Nested Loop Left Join + Join Filter: ((q.a + q.a) = r.a) + -> Seq Scan on sj q + Filter: (a IS NOT NULL) + -> Materialize + -> Seq Scan on sj r +(6 rows) + +-- FIXME this constant false filter doesn't look good. Should we merge +-- equivalence classes? +explain (costs off) +select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2; + QUERY PLAN +----------------------------------------------------- + Seq Scan on sj q + Filter: ((a IS NOT NULL) AND (b = 2) AND (b = 1)) +(2 rows) + +-- Check that attr_needed is updated correctly after self-join removal. In this +-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2. +-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b. +-- Use index scan for k1 so that we don't get 'b' from physical tlist used for +-- seqscan. Also disable reordering of joins because this test depends on a +-- particular join tree. +create table sk (a int, b int); +create index on sk(a); +set join_collapse_limit to 1; +set enable_seqscan to off; +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b; + QUERY PLAN +----------------------------------------------------- + Nested Loop + Join Filter: (k1.b = j2.b) + -> Nested Loop + -> Index Scan using sk_a_idx on sk k1 + -> Index Only Scan using sk_a_idx on sk k2 + Index Cond: (a = k1.a) + -> Materialize + -> Index Scan using sj_a_key on sj j2 + Index Cond: (a IS NOT NULL) +(9 rows) + +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b; + QUERY PLAN +----------------------------------------------------- + Nested Loop + Join Filter: (k1.b = j2.b) + -> Nested Loop + -> Index Scan using sk_a_idx on sk k1 + -> Index Only Scan using sk_a_idx on sk k2 + Index Cond: (a = k1.a) + -> Materialize + -> Index Scan using sj_a_key on sj j2 + Index Cond: (a IS NOT NULL) +(9 rows) + +reset join_collapse_limit; +reset enable_seqscan; +-- +---- It's not clear yet what to do in some of the cases we discussed +---- on the list, so they are disabled for now. +-- +---- We need an index on two columns for the next couple of tests. +--create table sl(a int, b int); +--insert into sl values (1, 1), (1, 2), (2, 1); +--create unique index on sl(a, b); +--vacuum analyze sl; +-- +---- Both sides are unique, but base quals are different +--explain (costs off) +--select * from sl t1, sl t2 +--where t1.a = t2.a and t1.b = 1 and t2.b = 2; +-- +---- Only one side is unqiue +--select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1; +--select * from sl t1, sl t2 where t1.a = t2.a and t2.b = 1; +-- +---- Several uniques indexes match, and we select a different one +---- for each side, so the join is not removed +--create table sm(a int unique, b int unique, c int unique); +--explain (costs off) +--select * from sm m, sm n where m.a = n.b and m.c = n.c; +--explain (costs off) +--select * from sm m, sm n where m.a = n.c and m.b = n.b; +--explain (costs off) +--select * from sm m, sm n where m.c = n.b and m.a = n.a; +reset enable_hashjoin; +reset enable_mergejoin; +-- -- Test hints given on incorrect column references are useful -- select t1.uunique1 from diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out index 8443c24..6034d93 100644 --- a/src/test/regress/expected/updatable_views.out +++ b/src/test/regress/expected/updatable_views.out @@ -2193,16 +2193,13 @@ SELECT * FROM rw_view1; (1 row) EXPLAIN (costs off) DELETE FROM rw_view1 WHERE id = 1 AND snoop(data); - QUERY PLAN -------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------- Update on base_tbl base_tbl_1 - -> Nested Loop - -> Index Scan using base_tbl_pkey on base_tbl base_tbl_1 - Index Cond: (id = 1) - -> Index Scan using base_tbl_pkey on base_tbl - Index Cond: (id = 1) - Filter: ((NOT deleted) AND snoop(data)) -(7 rows) + -> Index Scan using base_tbl_pkey on base_tbl + Index Cond: (id = 1) + Filter: ((NOT deleted) AND snoop(data)) +(4 rows) DELETE FROM rw_view1 WHERE id = 1 AND snoop(data); NOTICE: snooped value: Row 1 diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql index 85aa65d..b1e2483 100644 --- a/src/test/regress/sql/equivclass.sql +++ b/src/test/regress/sql/equivclass.sql @@ -262,3 +262,20 @@ explain (costs off) -- this could be converted, but isn't at present explain (costs off) select * from tenk1 where unique1 = unique1 or unique2 = unique2; + + +-- Test that broken ECs are processed correctly during self join removal. +-- Disable merge joins so that we don't get an error about missing commutator. +-- Test both orientations of the join clause, because only one of them breaks +-- the EC. +set enable_mergejoin to off; + +explain (costs off) + select * from ec0 m join ec0 n on m.ff = n.ff + join ec1 p on m.ff + n.ff = p.f1; + +explain (costs off) + select * from ec0 m join ec0 n on m.ff = n.ff + join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1; + +reset enable_mergejoin; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 57481d0..8d07615 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1646,6 +1646,140 @@ select * from int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok -- +-- test that semi- or inner self-joins on a unique column are removed +-- + +-- enable only nestloop to get more predictable plans +set enable_hashjoin to off; +set enable_mergejoin to off; + +create table sj (a int unique, b int); +insert into sj values (1, null), (null, 2), (2, 1); +analyze sj; + +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; + +explain (costs off) +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; + +explain (costs off) +select * from sj p +where exists (select * from sj q + where q.a = p.a and q.b < 10); + +-- Double self-join removal. +-- Use a condition on "b + 1", not on "b", for the second join, so that +-- the equivalence class is different from the first one, and we can +-- test the non-ec code path. +explain (costs off) +select * from sj t1 join sj t2 on t1.a = t2.a and t1.b = t2.b + join sj t3 on t2.a = t3.a and t2.b + 1 = t3.b + 1; + +-- subselect that references the removed relation +explain (costs off) +select t1.a, (select a from sj where a = t2.a and a = t1.a) +from sj t1, sj t2 +where t1.a = t2.a; + +-- self-join under outer join +explain (costs off) +select * from sj x join sj y on x.a = y.a +left join int8_tbl z on x.a = z.q1; + +explain (costs off) +select * from sj x join sj y on x.a = y.a +left join int8_tbl z on y.a = z.q1; + +-- Test that placeholders are updated correctly after join removal +explain (costs off) +select * from (values (1)) x +left join (select coalesce(y.q1, 1) from int8_tbl y + right join sj j1 inner join sj j2 on j1.a = j2.a + on true) z +on true; + +-- Test that OR predicated are updated correctly after join removal +CREATE TABLE tab_with_flag ( id INT PRIMARY KEY, is_flag SMALLINT); +CREATE INDEX idx_test_is_flag ON tab_with_flag (is_flag); +explain (costs off) +SELECT COUNT(*) FROM tab_with_flag WHERE (is_flag IS NULL OR is_flag = 0) AND id IN (SELECT id FROM tab_with_flag WHERE id IN (2, 3)); +DROP TABLE tab_with_flag; + +-- HAVING clause +explain (costs off) +select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1; + +-- update lateral references and range table entry reference +explain (verbose, costs off) +select 1 from (select x.* from sj x, sj y where x.a = y.a) q, + lateral generate_series(1, q.a) gs(i); + +explain (verbose, costs off) +select 1 from (select y.* from sj x, sj y where x.a = y.a) q, + lateral generate_series(1, q.a) gs(i); + +-- Test that a non-EC-derived join clause is processed correctly. Use an +-- outer join so that we can't form an EC. +explain (costs off) select * from sj p join sj q on p.a = q.a + left join sj r on p.a + q.a = r.a; + +-- FIXME this constant false filter doesn't look good. Should we merge +-- equivalence classes? +explain (costs off) +select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2; + +-- Check that attr_needed is updated correctly after self-join removal. In this +-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2. +-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b. +-- Use index scan for k1 so that we don't get 'b' from physical tlist used for +-- seqscan. Also disable reordering of joins because this test depends on a +-- particular join tree. +create table sk (a int, b int); +create index on sk(a); +set join_collapse_limit to 1; +set enable_seqscan to off; +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b; +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b; +reset join_collapse_limit; +reset enable_seqscan; + +-- +---- It's not clear yet what to do in some of the cases we discussed +---- on the list, so they are disabled for now. +-- +---- We need an index on two columns for the next couple of tests. +--create table sl(a int, b int); +--insert into sl values (1, 1), (1, 2), (2, 1); +--create unique index on sl(a, b); +--vacuum analyze sl; +-- +---- Both sides are unique, but base quals are different +--explain (costs off) +--select * from sl t1, sl t2 +--where t1.a = t2.a and t1.b = 1 and t2.b = 2; +-- +---- Only one side is unqiue +--select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1; +--select * from sl t1, sl t2 where t1.a = t2.a and t2.b = 1; +-- +---- Several uniques indexes match, and we select a different one +---- for each side, so the join is not removed +--create table sm(a int unique, b int unique, c int unique); +--explain (costs off) +--select * from sm m, sm n where m.a = n.b and m.c = n.c; +--explain (costs off) +--select * from sm m, sm n where m.a = n.c and m.b = n.b; +--explain (costs off) +--select * from sm m, sm n where m.c = n.b and m.a = n.a; + +reset enable_hashjoin; +reset enable_mergejoin; + +-- -- Test hints given on incorrect column references are useful --