Hello,
following this short discussion
http://archives.postgresql.org/message-id/4f5aa202.9020...@gmail.com
I gave it one more try and hacked the optimizer so that function can
become an inner relation in NL join, parametrized with values from the
outer relation.
I tried to explain my thoughts in comments. Other than that:
1. I haven't tried to use SpecialJoinInfo to constrain join order. Even
if the order matters in query like
SELECT * from a, func(a.i)
it's still inner join by nature. SpecialJoinInfo is used for INNER join
rarely, but never stored in PlannerInfo. Doing so only for these lateral
functions would be rather disruptive.
2. Simple SQL function (i.e. one that gets pulled-up into the main
query) is a special case. The query that results from such a pull-up no
longer contains any function (and thus is not affected by this patch)
but such cases need to be newly taken into account and examined / tested
(the patch unblocks them at parsing stage too).
3. There might be some open questions about SQL conformance.
I've spent quite a while looking into the optimizer code and after all I
was surprised that it didn't require that many changes. At least to make
few simple examples work. Do I ignore any important fact(s) ?
Thanks,
Tony.
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 446319d..f30ae4e 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -85,6 +85,44 @@ add_paths_to_joinrel(PlannerInfo *root,
SemiAntiJoinFactors semifactors;
Relids param_source_rels = NULL;
ListCell *lc;
+ bool nest_loop_only = false;
+
+ /*
+ * Should the outer path be parametrized by the inner, there's no method
+ * to evaluate such a join.
+ */
+ if (bms_overlap(outerrel->func_arg_relids, innerrel->relids)) {
+ return;
+ }
+
+ /*
+ * Only nest-loop join is considered viable method for lateral function.
+ *
+ * Non-empty 'innerrel->func_arg_relids' isn't a sufficient condition for
+ * NL join with parametrized function on the inner side:
+ *
+ * If there's no overlap, the function arguments have already been used
+ * to construct the innerrel (no reason to use them again) or they are supplied
+ * from higher level (and thus constant for the current join) .
+ */
+ if (bms_overlap(innerrel->func_arg_relids, outerrel->relids)) {
+ /*
+ * Merge join is the only method to evaluate full join, but merge join is a bad option
+ * for lateral functions.
+ */
+ if (jointype == JOIN_FULL) {
+ return;
+ }
+
+ /*
+ * TODO
+ * Probably not relevant. Verify.
+ */
+ if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) {
+ return;
+ }
+ nest_loop_only = true;
+ }
/*
* Find potential mergejoin clauses. We can skip this if we are not
@@ -92,7 +130,7 @@ add_paths_to_joinrel(PlannerInfo *root,
* way of implementing a full outer join, so override enable_mergejoin if
* it's a full join.
*/
- if (enable_mergejoin || jointype == JOIN_FULL)
+ if ((enable_mergejoin || jointype == JOIN_FULL) && !nest_loop_only)
mergeclause_list = select_mergejoin_clauses(root,
joinrel,
outerrel,
@@ -151,7 +189,7 @@ add_paths_to_joinrel(PlannerInfo *root,
* 1. Consider mergejoin paths where both relations must be explicitly
* sorted. Skip this if we can't mergejoin.
*/
- if (mergejoin_allowed)
+ if (mergejoin_allowed && !nest_loop_only)
sort_inner_and_outer(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype,
sjinfo, param_source_rels);
@@ -192,10 +230,13 @@ add_paths_to_joinrel(PlannerInfo *root,
* before being joined. As above, disregard enable_hashjoin for full
* joins, because there may be no other alternative.
*/
- if (enable_hashjoin || jointype == JOIN_FULL)
+ if ((enable_hashjoin || jointype == JOIN_FULL) && !nest_loop_only)
hash_inner_and_outer(root, joinrel, outerrel, innerrel,
restrictlist, jointype,
sjinfo, &semifactors, param_source_rels);
+
+ /* Prepare for check of function parametrization at the next higher level. */
+ joinrel->func_arg_relids = bms_union(outerrel->func_arg_relids, innerrel->func_arg_relids);
}
/*
@@ -654,6 +695,7 @@ match_unsorted_outer(PlannerInfo *root,
Path *inner_cheapest_total = innerrel->cheapest_total_path;
Path *matpath = NULL;
ListCell *lc1;
+ bool nest_loop_only = bms_overlap(innerrel->func_arg_relids, outerrel->relids);
/*
* Nestloop only supports inner, left, semi, and anti joins. Also, if we
@@ -814,6 +856,9 @@ match_unsorted_outer(PlannerInfo *root,
if (save_jointype == JOIN_UNIQUE_OUTER)
continue;
+ if (nest_loop_only)
+ continue;
+
/* Look for useful mergeclauses (if any) */
mergeclauses = find_mergeclauses_for_pathkeys(root,
outerpath->pathkeys,
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index c34b9b8..5552fa8 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -583,8 +583,10 @@ create_join_plan(PlannerInfo *root, JoinPath *best_path)
/* For a nestloop, include outer relids in curOuterRels for inner side */
if (best_path->path.pathtype == T_NestLoop)
+ {
root->curOuterRels = bms_union(root->curOuterRels,
best_path->outerjoinpath->parent->relids);
+ }
inner_plan = create_plan_recurse(root, best_path->innerjoinpath);
@@ -1663,18 +1665,32 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path,
FunctionScan *scan_plan;
Index scan_relid = best_path->parent->relid;
RangeTblEntry *rte;
+ FuncExpr *funcExpr;
/* it should be a function base rel... */
Assert(scan_relid > 0);
rte = planner_rt_fetch(scan_relid, root);
Assert(rte->rtekind == RTE_FUNCTION);
+ /*
+ * Ensure that vars in argument expressions get substituted in case the function participates in a join.
+ *
+ * (Function can only be used as inner relation of nest-loop join.)
+ */
+ funcExpr = (FuncExpr *) rte->funcexpr;
+ if (funcExpr->args != NULL) {
+ funcExpr->args = (List *)
+ replace_nestloop_params(root, (Node *) funcExpr->args);
+ }
+
/* Sort clauses into best execution order */
scan_clauses = order_qual_clauses(root, scan_clauses);
/* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */
scan_clauses = extract_actual_clauses(scan_clauses, false);
+
+
scan_plan = make_functionscan(tlist, scan_clauses, scan_relid,
rte->funcexpr,
rte->eref->colnames,
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index bfdd9ff..427e7e9 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -20,6 +20,7 @@
#include "optimizer/placeholder.h"
#include "optimizer/plancat.h"
#include "optimizer/restrictinfo.h"
+#include "optimizer/var.h"
#include "utils/hsearch.h"
@@ -97,6 +98,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
rel = makeNode(RelOptInfo);
rel->reloptkind = reloptkind;
rel->relids = bms_make_singleton(relid);
+ rel->func_arg_relids = NULL;
rel->rows = 0;
rel->width = 0;
rel->reltargetlist = NIL;
@@ -154,6 +156,19 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
break;
}
+ /*
+ * If function arguments reference other relations, we need to keep track
+ * in order to check for any join it's going to participate in whether parametrization
+ * is possible.
+ */
+ if (rte->rtekind == RTE_FUNCTION) {
+ FuncExpr *funcExpr = (FuncExpr *) rte->funcexpr;
+
+ if (funcExpr->args != NULL) {
+ rel->func_arg_relids = pull_varnos((Node *) funcExpr->args);
+ }
+ }
+
/* Save the finished struct in the query's simple_rel_array */
root->simple_rel_array[relid] = rel;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 97ab9d5..6178b17 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -557,25 +557,6 @@ transformRangeFunction(ParseState *pstate, RangeFunction *r)
assign_expr_collations(pstate, funcexpr);
/*
- * The function parameters cannot make use of any variables from other
- * FROM items. (Compare to transformRangeSubselect(); the coding is
- * different though because we didn't parse as a sub-select with its own
- * level of namespace.)
- *
- * XXX this will need further work to support SQL99's LATERAL() feature,
- * wherein such references would indeed be legal.
- */
- if (pstate->p_relnamespace || pstate->p_varnamespace)
- {
- if (contain_vars_of_level(funcexpr, 0))
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
- errmsg("function expression in FROM cannot refer to other relations of same query level"),
- parser_errposition(pstate,
- locate_var_of_level(funcexpr, 0))));
- }
-
- /*
* Disallow aggregate functions in the expression. (No reason to postpone
* this check until parseCheckAggregates.)
*/
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index e1d5fc0..c3f3394 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -292,6 +292,10 @@ typedef struct PlannerInfo
*
* relids - Set of base-relation identifiers; it is a base relation
* if there is just one, a join relation if more than one
+ * func_arg_relids - union of relations that all functions within the current relation
+ * reference via function arglists. Unlike clause parameters, this set is only
+ * determined by the functions contained in 'relids'. That's why it's not stored
+ * in 'ppilist' for each specific path.
* rows - estimated number of tuples in the relation after restriction
* clauses have been applied (ie, output rows of a plan for it)
* width - avg. number of bytes per tuple in the relation after the
@@ -394,6 +398,9 @@ typedef struct RelOptInfo
/* all relations included in this RelOptInfo */
Relids relids; /* set of base relids (rangetable indexes) */
+ /* union of relids used as parameters by all RTE functions that this relation contains */
+ Relids func_arg_relids;
+
/* size estimates generated by planner */
double rows; /* estimated number of result tuples */
int width; /* estimated avg width of result tuples */
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers