Re: [HACKERS] Re: Improve OR conditions on joined columns (common star schema problem)

Tom Lane Thu, 04 Jan 2018 14:51:25 -0800

I wrote:
> Jim Nasby <[email protected]> writes:
>> I've verified that the patch still applies and make check-world is clean.


> Not any more :-(.  Here's a v3 rebased over HEAD.  No substantive
> change from v2.

Again rebased up to HEAD; still no substantive changes.

                        regards, tom lane

diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 5e72df1..1644f8b 100644
*** a/src/backend/nodes/outfuncs.c
--- b/src/backend/nodes/outfuncs.c
*************** _outPlannerInfo(StringInfo str, const Pl
*** 2237,2242 ****
--- 2237,2243 ----
  	WRITE_FLOAT_FIELD(tuple_fraction, "%.4f");
  	WRITE_FLOAT_FIELD(limit_tuples, "%.0f");
  	WRITE_UINT_FIELD(qual_security_level);
+ 	WRITE_BOOL_FIELD(needBaseTids);
  	WRITE_BOOL_FIELD(hasInheritedTarget);
  	WRITE_BOOL_FIELD(hasJoinRTEs);
  	WRITE_BOOL_FIELD(hasLateralRTEs);
diff --git a/src/backend/optimizer/plan/Makefile b/src/backend/optimizer/plan/Makefile
index 88a9f7f..1db6bd5 100644
*** a/src/backend/optimizer/plan/Makefile
--- b/src/backend/optimizer/plan/Makefile
*************** top_builddir = ../../../..
*** 13,18 ****
  include $(top_builddir)/src/Makefile.global
  
  OBJS = analyzejoins.o createplan.o initsplan.o planagg.o planmain.o planner.o \
! 	setrefs.o subselect.o
  
  include $(top_srcdir)/src/backend/common.mk
--- 13,18 ----
  include $(top_builddir)/src/Makefile.global
  
  OBJS = analyzejoins.o createplan.o initsplan.o planagg.o planmain.o planner.o \
! 	planunionor.o setrefs.o subselect.o
  
  include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index ef25fef..c99b028 100644
*** a/src/backend/optimizer/plan/analyzejoins.c
--- b/src/backend/optimizer/plan/analyzejoins.c
*************** clause_sides_match_join(RestrictInfo *ri
*** 154,159 ****
--- 154,162 ----
   * cases, but we don't have the infrastructure to prove them.)  We also
   * have to check that the inner side doesn't generate any variables needed
   * above the join.
+  *
+  * Note: making this significantly smarter might break planunionor.c.
+  * Study that file before doing so.
   */
  static bool
  join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 7a34abc..cd09aaa 100644
*** a/src/backend/optimizer/plan/planmain.c
--- b/src/backend/optimizer/plan/planmain.c
*************** query_planner(PlannerInfo *root, List *t
*** 206,211 ****
--- 206,219 ----
  	add_placeholders_to_base_rels(root);
  
  	/*
+ 	 * Also, if we have a request to emit baserel CTIDs, add those to the
+ 	 * baserel targetlists now.  This likewise has to be done after join
+ 	 * removal, because we only want CTIDs for rels that survive join removal.
+ 	 */
+ 	if (root->needBaseTids)
+ 		add_base_rel_ctids(root);
+ 
+ 	/*
  	 * Construct the lateral reference sets now that we have finalized
  	 * PlaceHolderVar eval levels.
  	 */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 7b52dad..3a29d8d 100644
*** a/src/backend/optimizer/plan/planner.c
--- b/src/backend/optimizer/plan/planner.c
*************** subquery_planner(PlannerGlobal *glob, Qu
*** 547,552 ****
--- 547,553 ----
  	root->grouping_map = NULL;
  	root->minmax_aggs = NIL;
  	root->qual_security_level = 0;
+ 	root->needBaseTids = false;
  	root->hasInheritedTarget = false;
  	root->hasRecursion = hasRecursion;
  	if (hasRecursion)
*************** grouping_planner(PlannerInfo *root, bool
*** 1667,1672 ****
--- 1668,1674 ----
  		WindowFuncLists *wflists = NULL;
  		List	   *activeWindows = NIL;
  		grouping_sets_data *gset_data = NULL;
+ 		List	   *union_or_subpaths;
  		standard_qp_extra qp_extra;
  
  		/* A recursive query should always have setOperations */
*************** grouping_planner(PlannerInfo *root, bool
*** 1744,1749 ****
--- 1746,1759 ----
  			preprocess_minmax_aggregates(root, tlist);
  
  		/*
+ 		 * Preprocess join OR clauses that might be better handled as UNIONs.
+ 		 * This likewise needs to be close to the query_planner() call.  But
+ 		 * it doesn't matter which of preprocess_minmax_aggregates() and this
+ 		 * function we call first, because they treat disjoint sets of cases.
+ 		 */
+ 		union_or_subpaths = split_join_or_clauses(root);
+ 
+ 		/*
  		 * Figure out whether there's a hard limit on the number of rows that
  		 * query_planner's result subplan needs to return.  Even if we know a
  		 * hard limit overall, it doesn't apply if the query has any
*************** grouping_planner(PlannerInfo *root, bool
*** 1778,1783 ****
--- 1788,1801 ----
  									standard_qp_callback, &qp_extra);
  
  		/*
+ 		 * If we found any way to convert a join OR clause to a union, finish
+ 		 * up generating the path(s) for that, and add them into the topmost
+ 		 * scan/join relation.
+ 		 */
+ 		if (union_or_subpaths)
+ 			finish_union_or_paths(root, current_rel, union_or_subpaths);
+ 
+ 		/*
  		 * Convert the query's result tlist into PathTarget format.
  		 *
  		 * Note: it's desirable to not do this till after query_planner(),
diff --git a/src/backend/optimizer/plan/planunionor.c b/src/backend/optimizer/plan/planunionor.c
index ...dd11e72 .
*** a/src/backend/optimizer/plan/planunionor.c
--- b/src/backend/optimizer/plan/planunionor.c
***************
*** 0 ****
--- 1,667 ----
+ /*-------------------------------------------------------------------------
+  *
+  * planunionor.c
+  *	  Consider whether join OR clauses can be converted to UNION queries.
+  *
+  * This module looks for OR clauses whose arms each reference a single
+  * query relation (but not all the same rel), and tries to generate a path
+  * representing conversion of such an OR clause into a UNION operation.
+  * For example,
+  *		SELECT ... FROM a, b WHERE (cond-on-A OR cond-on-B) AND other-conds
+  * can be implemented as
+  *		SELECT ... FROM a, b WHERE cond-on-A AND other-conds
+  *		UNION
+  *		SELECT ... FROM a, b WHERE cond-on-B AND other-conds
+  * given a suitable definition of "UNION" (one that won't merge rows that
+  * would have been separate in the original query output).  Since this change
+  * converts join clauses into restriction clauses, the modified query can be
+  * much faster to run than the original, despite the duplication of effort
+  * involved and the extra UNION processing.  It's particularly useful for
+  * star-schema queries where the OR arms reference different dimension tables;
+  * each separated query may be able to remove joins to all but one dimension
+  * table, and arrange that join to use an inner indexscan on the fact table.
+  *
+  * We must insist that the WHERE and JOIN/ON clauses contain no volatile
+  * functions, because of the likelihood that qual clauses will be evaluated
+  * more times than a naive programmer would expect.  We need not restrict
+  * the SELECT's tlist, because that will be evaluated after the UNION.
+  *
+  * The current implementation of the UNION step is to de-duplicate using
+  * row CTIDs.  A big limitation is that this only works on plain relations,
+  * and not for instance on foreign tables.  Another problem is that we can
+  * only de-duplicate by sort/unique, not hashing; but that could be fixed
+  * if we write a hash opclass for TID.
+  *
+  * To allow join removal to happen, we can't reference the CTID column
+  * of an otherwise-removable relation.  Therefore, this code proceeds by
+  * de-duplicating output rows using only the CTIDs of relations that are not
+  * removable in any UNION arm.  It is not immediately obvious that that works
+  * at all, but it does, given one restriction.  If a rel is removed in some
+  * arm, then it is not referenced above the joins in that arm (in particular,
+  * it's not used in that arm's version of the OR clause), and we were able
+  * to prove that removing it doesn't change the output rowcount in that arm.
+  * Therefore there's no need to consider it for de-duplication so far as that
+  * arm's output is concerned.  The identical proof can be expected to apply
+  * in other arms, except in an arm that references that rel in its version
+  * of the OR clause.  But in such an arm, we have effectively added a
+  * restriction clause to what is known in other arms, which means that the
+  * set of rows output by that rel can't increase compared to other arms.
+  * Therefore the situation in such an arm must be that including the rel
+  * could result in either zero or one output row, rather than exactly one
+  * output row as in other arms.  So we still don't need to consider it for
+  * de-duplication.  But there's a hole in this argument, which is that we
+  * must consider the effects of reduce_outer_joins() as well as
+  * remove_useless_joins().  Addition of a restriction clause could result in
+  * simplifying a FULL join into a LEFT join, which might allow join removal
+  * to happen against the right side of that join; but the same proof would
+  * fail in arms that didn't restrict the left side.  We deal with this issue
+  * by not attempting union OR transformation if the query has any FULL joins.
+  *
+  *
+  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  *
+  * IDENTIFICATION
+  *	  src/backend/optimizer/plan/planunionor.c
+  *
+  *-------------------------------------------------------------------------
+  */
+ #include "postgres.h"
+ 
+ #include "access/sysattr.h"
+ #include "catalog/pg_class.h"
+ #include "catalog/pg_operator.h"
+ #include "catalog/pg_type.h"
+ #include "miscadmin.h"
+ #include "nodes/makefuncs.h"
+ #include "optimizer/clauses.h"
+ #include "optimizer/cost.h"
+ #include "optimizer/pathnode.h"
+ #include "optimizer/planmain.h"
+ #include "optimizer/prep.h"
+ #include "optimizer/subselect.h"
+ #include "optimizer/tlist.h"
+ #include "optimizer/var.h"
+ 
+ 
+ static bool is_suitable_join_or_clause(BoolExpr *orclause, List **relids);
+ static bool is_query_safe_for_union_or_transform(PlannerInfo *root);
+ static List *create_union_or_subpaths(PlannerInfo *root, BoolExpr *orclause,
+ 						 int n, List *armrelids);
+ static void union_or_qp_callback(PlannerInfo *root, void *extra);
+ 
+ 
+ /*
+  * split_join_or_clauses - create paths based on splitting join OR clauses
+  *
+  * This should be called by grouping_planner() just before it's ready to call
+  * query_planner(), because we generate simplified join paths by cloning the
+  * planner's state and invoking query_planner() on a modified version of
+  * the query parsetree.  Thus, all preprocessing needed before query_planner()
+  * must already be done.  Note however that we repeat reduce_outer_joins()
+  * because of the possibility that the simplified WHERE clause allows reduction
+  * of an outer join to inner-join form.  That's okay for now, but maybe we
+  * should move the reduce_outer_joins() call into query_planner()?
+  *
+  * The result is a list (one entry per potential union OR path) of sublists of
+  * best paths for the inputs to the UNION step.  Adding the UNION processing
+  * is retty mechanical, but we can't do it until we have a RelOptInfo for the
+  * top-level join rel.
+  */
+ List *
+ split_join_or_clauses(PlannerInfo *root)
+ {
+ 	List	   *results = NIL;
+ 	Query	   *parse = root->parse;
+ 	bool		checked_query = false;
+ 	ListCell   *lc;
+ 	int			n;
+ 
+ 	/*
+ 	 * At least for now, we restrict this optimization to plain SELECTs.
+ 	 */
+ 	if (parse->commandType != CMD_SELECT ||
+ 		parse->rowMarks ||
+ 		parse->setOperations)
+ 		return NIL;
+ 
+ 	/*
+ 	 * Reject if query contains any CTEs; copying them would break
+ 	 * single-evaluation semantics.  (In principle we could arrange for all
+ 	 * UNION arms to read from a single instance of a CTE, but that's an
+ 	 * improvement for another day, especially since we have no way to de-dup
+ 	 * CTE outputs anyway.)
+ 	 */
+ 	if (parse->cteList)
+ 		return NIL;
+ 
+ 	/*
+ 	 * The query must reference multiple tables, else we certainly aren't
+ 	 * going to find any suitable OR clauses.  Do a quick check that there's
+ 	 * more than one RTE.
+ 	 */
+ 	if (list_length(parse->rtable) < 2)
+ 		return NIL;
+ 
+ 	/*
+ 	 * Scan the top-level WHERE clause looking for suitable OR clauses, and
+ 	 * for each one, generate paths for the UNION input sub-queries.  There
+ 	 * might be more than one suitable OR clause, in which case we can try the
+ 	 * transformation for each one of them separately and add that list of
+ 	 * paths to the results.
+ 	 *
+ 	 * XXX we should also search the JOIN/ON clauses of any top-level inner
+ 	 * JOIN nodes, since those are semantically equivalent to WHERE.  But it's
+ 	 * hard to see how to do that without either copying the whole JOIN tree
+ 	 * in advance or repeating the search after copying, and neither of those
+ 	 * options seems attractive.
+ 	 */
+ 	n = 0;
+ 	foreach(lc, (List *) parse->jointree->quals)
+ 	{
+ 		Node	   *qual = (Node *) lfirst(lc);
+ 		List	   *armrelids;
+ 
+ 		if (or_clause(qual) &&
+ 			is_suitable_join_or_clause((BoolExpr *) qual, &armrelids))
+ 		{
+ 			List	   *subpaths;
+ 
+ 			/*
+ 			 * Check that the query as a whole is safe for this optimization.
+ 			 * We only need to do this once, but it's somewhat expensive, so
+ 			 * don't do it till we find a candidate OR clause.
+ 			 */
+ 			if (!checked_query)
+ 			{
+ 				if (!is_query_safe_for_union_or_transform(root))
+ 					return NIL;
+ 				checked_query = true;
+ 			}
+ 			/* OK, transform the query and create a list of sub-paths */
+ 			subpaths = create_union_or_subpaths(root, (BoolExpr *) qual,
+ 												n, armrelids);
+ 			results = lappend(results, subpaths);
+ 		}
+ 		n++;
+ 	}
+ 
+ 	return results;
+ }
+ 
+ /*
+  * Make sure that a UNION subpath will emit the CTID columns for all its
+  * (surviving) baserels.  This is called after we've done join removal in
+  * the UNION arm.
+  */
+ void
+ add_base_rel_ctids(PlannerInfo *root)
+ {
+ 	Relids		allbaserels;
+ 	List	   *vars;
+ 	int			brelid;
+ 
+ 	/* Find out the set of baserels that survived join removal */
+ 	allbaserels = get_relids_in_jointree((Node *) root->parse->jointree, false);
+ 	/* For each such rel, make a Var for its CTID column */
+ 	vars = NIL;
+ 	brelid = -1;
+ 	while ((brelid = bms_next_member(allbaserels, brelid)) >= 0)
+ 	{
+ 		Var		   *var;
+ 
+ 		var = makeVar(brelid,
+ 					  SelfItemPointerAttributeNumber,
+ 					  TIDOID,
+ 					  -1,
+ 					  InvalidOid,
+ 					  0);
+ 		vars = lappend(vars, var);
+ 	}
+ 	/* Add to rel tlists if not present, and mark as needed at top level */
+ 	add_vars_to_targetlist(root, vars, bms_make_singleton(0), false);
+ }
+ 
+ /*
+  * Finish constructing Paths representing the UNION implementation of join
+  * OR clause(s), and attach them to "joinrel", which is the final scan/join
+  * relation returned by query_planner() for the conventional implementation of
+  * the query.  "union_or_subpaths" is the output of split_join_or_clauses().
+  */
+ void
+ finish_union_or_paths(PlannerInfo *root, RelOptInfo *joinrel,
+ 					  List *union_or_subpaths)
+ {
+ 	ListCell   *lc;
+ 
+ 	/* This loop iterates once per splittable OR clause */
+ 	foreach(lc, union_or_subpaths)
+ 	{
+ 		List	   *subpaths = (List *) lfirst(lc);
+ 		List	   *common_exprs;
+ 		PathTarget *common_target;
+ 		Path	   *appendpath;
+ 		List	   *uniq_operators;
+ 		List	   *uniq_exprs;
+ 		UniquePath *pathnode;
+ 		Path		sort_path;	/* dummy for result of cost_sort */
+ 		int			numCols;
+ 		ListCell   *lc2;
+ 
+ 		/*
+ 		 * Join removal in the sub-queries might have resulted in different
+ 		 * sub-paths returning different sets of Vars, in particular we might
+ 		 * not see the full set of artificially-added CTID Vars coming out of
+ 		 * each sub-path.  Fortunately, we only need the ones that are
+ 		 * available from every sub-path.  Since Append can't project, we need
+ 		 * to build a pathtarget containing only the commonly available Vars,
+ 		 * and force each sub-path to return that target.
+ 		 *
+ 		 * This coding assumes that the commonly available Vars will appear in
+ 		 * the same order in each subpath target, which should be true but
+ 		 * it's surely an implementation artifact.  If it stops being true, we
+ 		 * could fall back on list_intersection(), but that'd be O(N^3).
+ 		 */
+ 		common_exprs = (List *)
+ 			copyObject(((Path *) linitial(subpaths))->pathtarget->exprs);
+ 		for_each_cell(lc2, lnext(list_head(subpaths)))
+ 		{
+ 			Path	   *subpath = (Path *) lfirst(lc2);
+ 			ListCell   *lcs;
+ 			ListCell   *lcc;
+ 			ListCell   *prevc;
+ 
+ 			lcs = list_head(subpath->pathtarget->exprs);
+ 			prevc = NULL;
+ 			lcc = list_head(common_exprs);
+ 			while (lcc)
+ 			{
+ 				ListCell   *nextc = lnext(lcc);
+ 
+ 				if (lcs && equal(lfirst(lcs), lfirst(lcc)))
+ 				{
+ 					lcs = lnext(lcs);
+ 					prevc = lcc;
+ 				}
+ 				else
+ 					common_exprs = list_delete_cell(common_exprs, lcc, prevc);
+ 				lcc = nextc;
+ 			}
+ 		}
+ 		common_target = create_empty_pathtarget();
+ 		common_target->exprs = common_exprs;
+ 		set_pathtarget_cost_width(root, common_target);
+ 		/* Now forcibly apply this target to each subpath */
+ 		foreach(lc2, subpaths)
+ 		{
+ 			Path	   *subpath = (Path *) lfirst(lc2);
+ 
+ 			lfirst(lc2) = create_projection_path(root,
+ 												 joinrel,
+ 												 subpath,
+ 												 common_target);
+ 		}
+ 
+ 		/*
+ 		 * Generate Append path combining the sub-paths for this UNION.  The
+ 		 * Append path's pathtarget has to match what is actually coming out
+ 		 * of the subpaths, since Append can't project.
+ 		 */
+ 		appendpath = (Path *) create_append_path(joinrel, subpaths, NIL,
+ 												 NULL, 0, false, NIL, -1);
+ 		appendpath->pathtarget = common_target;
+ 
+ 		/*
+ 		 * Make the operator and expression lists needed for the Unique path.
+ 		 * We need to unique-ify on every CTID that is commonly available from
+ 		 * all the sub-paths.  (See discussion at head of file.)
+ 		 */
+ 		uniq_operators = uniq_exprs = NIL;
+ 		foreach(lc2, common_exprs)
+ 		{
+ 			Var		   *var = (Var *) lfirst(lc2);
+ 
+ 			if (IsA(var, Var) &&
+ 				var->varattno == SelfItemPointerAttributeNumber &&
+ 				var->varlevelsup == 0)
+ 			{
+ 				Assert(var->vartype == TIDOID);
+ 				uniq_operators = lappend_oid(uniq_operators, TIDEqualOperator);
+ 				uniq_exprs = lappend(uniq_exprs, var);
+ 			}
+ 		}
+ 		Assert(uniq_exprs != NIL);
+ 
+ 		/*
+ 		 * Generate a Unique path representing the de-duplication step. For
+ 		 * now, we can only consider sort+unique implementation, since we lack
+ 		 * hashing support for type "tid".
+ 		 *
+ 		 * XXX maybe refactor to share some code with create_unique_path()?
+ 		 */
+ 		pathnode = makeNode(UniquePath);
+ 
+ 		pathnode->path.pathtype = T_Unique;
+ 		pathnode->path.parent = joinrel;
+ 		pathnode->path.pathtarget = appendpath->pathtarget;
+ 		pathnode->path.param_info = appendpath->param_info;
+ 		pathnode->path.parallel_aware = false;
+ 		pathnode->path.parallel_safe = joinrel->consider_parallel &&
+ 			appendpath->parallel_safe;
+ 		pathnode->path.parallel_workers = appendpath->parallel_workers;
+ 
+ 		/*
+ 		 * Treat the output as unsorted, since it almost certainly doesn't
+ 		 * match any useful pathkeys.
+ 		 */
+ 		pathnode->path.pathkeys = NIL;
+ 
+ 		pathnode->subpath = appendpath;
+ 		pathnode->in_operators = uniq_operators;
+ 		pathnode->uniq_exprs = uniq_exprs;
+ 
+ 		/* Estimate number of output rows */
+ 		pathnode->path.rows = appendpath->rows;
+ 		numCols = list_length(uniq_exprs);
+ 
+ 		/*
+ 		 * Estimate cost for sort+unique implementation
+ 		 */
+ 		cost_sort(&sort_path, root, NIL,
+ 				  appendpath->total_cost,
+ 				  appendpath->rows,
+ 				  appendpath->pathtarget->width,
+ 				  0.0,
+ 				  work_mem,
+ 				  -1.0);
+ 
+ 		/*
+ 		 * Charge one cpu_operator_cost per comparison per input tuple.  We
+ 		 * assume all columns get compared at most of the tuples.  (XXX
+ 		 * probably this is an overestimate.)  This should agree with
+ 		 * create_unique_path.
+ 		 */
+ 		sort_path.total_cost += cpu_operator_cost * appendpath->rows * numCols;
+ 
+ 		pathnode->umethod = UNIQUE_PATH_SORT;
+ 
+ 		pathnode->path.startup_cost = sort_path.startup_cost;
+ 		pathnode->path.total_cost = sort_path.total_cost;
+ 
+ 		/* Attach it to the joinrel */
+ 		add_path(joinrel, (Path *) pathnode);
+ 	}
+ 
+ 	/* We need to refigure which is the cheapest path for the joinrel */
+ 	set_cheapest(joinrel);
+ }
+ 
+ /*
+  * Is this OR clause a suitable clause for splitting?
+  *
+  * Each of its arms must reference just one rel, and they must not all be
+  * the same rel.
+  * On success, pass back a list of the relids referenced by each OR arm,
+  * so we don't have to repeat the pull_varnos() work later.
+  */
+ static bool
+ is_suitable_join_or_clause(BoolExpr *orclause, List **relids)
+ {
+ 	bool		ok = false;
+ 	List	   *relidlist = NIL;
+ 	int			firstrelid = 0;
+ 	ListCell   *lc;
+ 
+ 	*relids = NIL;				/* prevent uninitialized-variable warnings */
+ 	foreach(lc, orclause->args)
+ 	{
+ 		Node	   *qual = (Node *) lfirst(lc);
+ 		Relids		varnos = pull_varnos(qual);
+ 		int			relid;
+ 
+ 		if (!bms_get_singleton_member(varnos, &relid))
+ 			return false;		/* this arm fails the sine qua non */
+ 		if (relidlist == NIL)
+ 			firstrelid = relid;
+ 		else if (firstrelid != relid)
+ 			ok = true;			/* arms reference more than one relid */
+ 		relidlist = lappend_int(relidlist, relid);
+ 	}
+ 	*relids = relidlist;
+ 	return ok;
+ }
+ 
+ /*
+  * Is query as a whole safe to apply union OR transformation to?
+  * This checks relatively-expensive conditions that we don't want to
+  * worry about until we've found a candidate OR clause.
+  */
+ static bool
+ is_query_safe_for_union_or_transform(PlannerInfo *root)
+ {
+ 	Query	   *parse = root->parse;
+ 	Relids		allbaserels;
+ 	ListCell   *lc;
+ 	int			relid;
+ 
+ 	/*
+ 	 * Must not have any volatile functions in FROM or WHERE (see notes at
+ 	 * head of file).
+ 	 */
+ 	if (contain_volatile_functions((Node *) parse->jointree))
+ 		return false;
+ 
+ 	/*
+ 	 * We insist that all baserels used in the query be plain relations, so
+ 	 * that we can use their ctids as unique row identifiers in the UNION
+ 	 * step.  One could imagine ways to relax this later, for instance by
+ 	 * forcibly adding WITH ORDINALITY to function RTEs.  We'd have to examine
+ 	 * each RTE anyway, though, to check for volatile functions.
+ 	 */
+ 	allbaserels = get_relids_in_jointree((Node *) parse->jointree, false);
+ 	relid = 0;
+ 	foreach(lc, parse->rtable)
+ 	{
+ 		RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
+ 
+ 		relid++;
+ 
+ 		/* fail if query contains any FULL joins (see head of this file) */
+ 		if (rte->rtekind == RTE_JOIN && rte->jointype == JOIN_FULL)
+ 			return false;
+ 
+ 		/* otherwise, ignore RTEs that aren't referenced baserels */
+ 		if (!bms_is_member(relid, allbaserels))
+ 			continue;
+ 
+ 		/* fail if not plain rel */
+ 		if (rte->rtekind != RTE_RELATION)
+ 			return false;
+ 		/* fail if it doesn't have CTIDs */
+ 		if (rte->relkind != RELKIND_RELATION &&
+ 			rte->relkind != RELKIND_MATVIEW)
+ 			return false;
+ 
+ 		/* disallow TABLESAMPLE (might be okay if repeatable?) */
+ 		if (rte->tablesample)
+ 			return false;
+ 
+ 		/* check for volatiles in security barrier quals */
+ 		if (contain_volatile_functions((Node *) rte->securityQuals))
+ 			return false;
+ 	}
+ 
+ 	/* OK to proceed */
+ 	return true;
+ }
+ 
+ /*
+  * Split the query and the given OR clause into one UNION arm per relation
+  * mentioned in the OR clause, and make a list of best paths for the UNION
+  * arms.  (Since the UNION step will lose any ordering or fast-start
+  * properties of the paths, there's no need to consider any but the
+  * cheapest-total path for each arm; hence it's okay to return just one path
+  * per arm.)
+  *
+  * "n" is the OR clause's index in the query's WHERE list.
+  * "armrelids" is the OR-arm-to-referenced-rel mapping.
+  */
+ static List *
+ create_union_or_subpaths(PlannerInfo *root, BoolExpr *orclause,
+ 						 int n, List *armrelids)
+ {
+ 	List	   *subpaths = NIL;
+ 	Relids		orrels;
+ 	int			relid;
+ 	ListCell   *lc;
+ 	ListCell   *lc2;
+ 
+ 	/*
+ 	 * There might be multiple OR arms referring to the same rel, which we
+ 	 * should combine into a restriction OR clause.  So first identify the set
+ 	 * of rels used in the OR.
+ 	 */
+ 	orrels = NULL;
+ 	foreach(lc, armrelids)
+ 		orrels = bms_add_member(orrels, lfirst_int(lc));
+ 
+ 	/* Now, for each such rel, generate a path for a UNION arm */
+ 	while ((relid = bms_first_member(orrels)) >= 0)
+ 	{
+ 		List	   *orarms;
+ 		PlannerInfo *subroot;
+ 		Query	   *parse;
+ 		List	   *subquery_quals;
+ 		bool		hasOuterJoins;
+ 		RelOptInfo *final_rel;
+ 		Path	   *subpath;
+ 		int			k;
+ 		ListCell   *prev;
+ 
+ 		/* Extract the OR arms for this rel, making copies for safety */
+ 		orarms = NIL;
+ 		forboth(lc, orclause->args, lc2, armrelids)
+ 		{
+ 			Node	   *qual = (Node *) lfirst(lc);
+ 			int			qualrelid = lfirst_int(lc2);
+ 
+ 			if (qualrelid == relid)
+ 				orarms = lappend(orarms, copyObject(qual));
+ 		}
+ 		Assert(orarms != NIL);
+ 		if (list_length(orarms) == 1)
+ 		{
+ 			/*
+ 			 * When there's just a single arm for this rel (the typical case),
+ 			 * it goes directly into the subquery's WHERE list.  But it might
+ 			 * be a sub-AND, in which case we must flatten it into a qual list
+ 			 * to preserve AND/OR flatness.
+ 			 */
+ 			orarms = make_ands_implicit((Expr *) linitial(orarms));
+ 		}
+ 		else
+ 		{
+ 			/*
+ 			 * When there's more than one arm, convert back to an OR clause.
+ 			 * No flatness worries here; the arms were already valid OR-list
+ 			 * elements.
+ 			 */
+ 			orarms = list_make1(make_orclause(orarms));
+ 		}
+ 
+ 		/* Clone the planner's state */
+ 		subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
+ 		memcpy(subroot, root, sizeof(PlannerInfo));
+ 		subroot->parse = parse = (Query *) copyObject(root->parse);
+ 		/* Making copies of these might be overkill, but be safe */
+ 		subroot->processed_tlist = (List *) copyObject(root->processed_tlist);
+ 		subroot->append_rel_list = (List *) copyObject(root->append_rel_list);
+ 		/* Tell query_planner to expect full retrieval of UNION input */
+ 		subroot->tuple_fraction = 1.0;
+ 		subroot->limit_tuples = -1.0;
+ 
+ 		/*
+ 		 * Remove the subquery's copy of the original OR clause, which we
+ 		 * identify by its index in the WHERE clause list.
+ 		 */
+ 		subquery_quals = (List *) parse->jointree->quals;
+ 		k = 0;
+ 		prev = NULL;
+ 		foreach(lc, subquery_quals)
+ 		{
+ 			if (k == n)
+ 			{
+ 				subquery_quals = list_delete_cell(subquery_quals, lc, prev);
+ 				break;
+ 			}
+ 			k++;
+ 			prev = lc;
+ 		}
+ 
+ 		/* And instead add the qual or quals we extracted from the OR clause */
+ 		subquery_quals = list_concat(subquery_quals, orarms);
+ 		parse->jointree->quals = (Node *) subquery_quals;
+ 
+ 		/*
+ 		 * Ask for baserel CTIDs to be added to the output of the subquery. We
+ 		 * only want CTIDs of rels that will survive join removal, so we can't
+ 		 * add them now, as that would in itself prevent join removal.
+ 		 */
+ 		subroot->needBaseTids = true;
+ 
+ 		/* Re-apply reduce_outer_joins() in case we can now reduce some */
+ 		/* (XXX would be better if this just got moved into query_planner) */
+ 		hasOuterJoins = false;
+ 		foreach(lc, parse->rtable)
+ 		{
+ 			RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
+ 
+ 			if (rte->rtekind == RTE_JOIN)
+ 			{
+ 				if (IS_OUTER_JOIN(rte->jointype))
+ 				{
+ 					hasOuterJoins = true;
+ 					break;
+ 				}
+ 			}
+ 		}
+ 		if (hasOuterJoins)
+ 			reduce_outer_joins(subroot);
+ 
+ 		/* Plan the modified query */
+ 		final_rel = query_planner(subroot, subroot->processed_tlist,
+ 								  union_or_qp_callback, NULL);
+ 
+ 		/*
+ 		 * Get the cheapest-total path for the subquery; there's little value
+ 		 * in considering any others.
+ 		 */
+ 		subpath = final_rel->cheapest_total_path;
+ 		Assert(subpath);
+ 
+ 		/* Add cheapest-total path to subpaths list */
+ 		subpaths = lappend(subpaths, subpath);
+ 	}
+ 
+ 	return subpaths;
+ }
+ 
+ /*
+  * Compute query_pathkeys and other pathkeys during plan generation
+  */
+ static void
+ union_or_qp_callback(PlannerInfo *root, void *extra)
+ {
+ 	/*
+ 	 * Since the output of the subquery is going to go through a UNION step
+ 	 * that destroys ordering, there's little need to worry about what its
+ 	 * output order is.  Hence, don't bother telling it about pathkeys that
+ 	 * might apply to these later execution steps.
+ 	 */
+ 	root->group_pathkeys = NIL;
+ 	root->window_pathkeys = NIL;
+ 	root->distinct_pathkeys = NIL;
+ 	root->sort_pathkeys = NIL;
+ 	root->query_pathkeys = NIL;
+ }
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 0e2a220..8d47f4a 100644
*** a/src/backend/optimizer/prep/prepjointree.c
--- b/src/backend/optimizer/prep/prepjointree.c
*************** pull_up_simple_subquery(PlannerInfo *roo
*** 914,919 ****
--- 914,920 ----
  	subroot->grouping_map = NULL;
  	subroot->minmax_aggs = NIL;
  	subroot->qual_security_level = 0;
+ 	subroot->needBaseTids = false;
  	subroot->hasInheritedTarget = false;
  	subroot->hasRecursion = false;
  	subroot->wt_param_id = -1;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 71689b8..5a399da 100644
*** a/src/include/nodes/relation.h
--- b/src/include/nodes/relation.h
*************** typedef struct PlannerInfo
*** 297,302 ****
--- 297,303 ----
  	Index		qual_security_level;	/* minimum security_level for quals */
  	/* Note: qual_security_level is zero if there are no securityQuals */
  
+ 	bool		needBaseTids;	/* true to force outputting baserel CTIDs */
  	bool		hasInheritedTarget; /* true if parse->resultRelation is an
  									 * inheritance child rel */
  	bool		hasJoinRTEs;	/* true if any RTEs are RTE_JOIN kind */
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 7132c88..cef62cf 100644
*** a/src/include/optimizer/planmain.h
--- b/src/include/optimizer/planmain.h
*************** extern RelOptInfo *query_planner(Planner
*** 46,51 ****
--- 46,59 ----
  extern void preprocess_minmax_aggregates(PlannerInfo *root, List *tlist);
  
  /*
+  * prototypes for plan/planunionor.c
+  */
+ extern List *split_join_or_clauses(PlannerInfo *root);
+ extern void add_base_rel_ctids(PlannerInfo *root);
+ extern void finish_union_or_paths(PlannerInfo *root, RelOptInfo *joinrel,
+ 					  List *union_or_subpaths);
+ 
+ /*
   * prototypes for plan/createplan.c
   */
  extern Plan *create_plan(PlannerInfo *root, Path *best_path);

Re: [HACKERS] Re: Improve OR conditions on joined columns (common star schema problem)

Reply via email to