On Fri, Jun 17, 2016 at 9:29 AM, Robert Haas <robertmh...@gmail.com> wrote: > On Fri, Jun 17, 2016 at 9:04 AM, Amit Kapila <amit.kapil...@gmail.com> wrote: >> Attached, please find updated patch based on above lines. Due to addition >> of projection path on top of partial paths, some small additional cost is >> added for parallel paths. In one of the tests in select_parallel.sql the >> plan was getting converted to serial plan from parallel plan due to this >> cost, so I have changed it to make it more restrictive. Before changing, I >> have debugged the test to confirm that it was due to this new projection >> path cost. I have added two new tests as well to cover the new code added >> by patch. > > I'm going to go work on this patch now.
Here is a revised version, which I plan to commit in a few hours before the workday expires. I kept it basically as Amit had it, but I whacked the comments around some and, more substantively, avoided inserting and/or charging for a Result node when that's not necessary. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index cefec7b..0434a5a 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -465,7 +465,8 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, * cheapest path.) */ sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path, - create_pathtarget(subroot, tlist)); + create_pathtarget(subroot, tlist), + false); /* * Determine cost to get just the first row of the presorted path. diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 07b925e..0af8151 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1500,6 +1500,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, PathTarget *grouping_target; PathTarget *scanjoin_target; bool have_grouping; + bool scanjoin_target_parallel_safe = false; WindowFuncLists *wflists = NULL; List *activeWindows = NIL; List *rollup_lists = NIL; @@ -1730,7 +1731,16 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, scanjoin_target = grouping_target; /* - * Forcibly apply that target to all the Paths for the scan/join rel. + * Check whether scan/join target is parallel safe ... unless there + * are no partial paths, in which case we don't care. + */ + if (current_rel->partial_pathlist && + !has_parallel_hazard((Node *) scanjoin_target->exprs, false)) + scanjoin_target_parallel_safe = true; + + /* + * Forcibly apply scan/join target to all the Paths for the scan/join + * rel. * * In principle we should re-run set_cheapest() here to identify the * cheapest path, but it seems unlikely that adding the same tlist @@ -1746,7 +1756,8 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, Assert(subpath->param_info == NULL); path = apply_projection_to_path(root, current_rel, - subpath, scanjoin_target); + subpath, scanjoin_target, + scanjoin_target_parallel_safe); /* If we had to add a Result, path is different from subpath */ if (path != subpath) { @@ -1759,6 +1770,70 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, } /* + * Upper planning steps which make use of the top scan/join rel's + * partial pathlist will expect partial paths for that rel to produce + * the same output as complete paths ... and we just changed the + * output for the complete paths, so we'll need to do the same thing + * for partial paths. + */ + if (scanjoin_target_parallel_safe) + { + /* + * Apply the scan/join target to each partial path. Otherwise, + * anything that attempts to use the partial paths for further + * upper planning may go wrong. + */ + foreach(lc, current_rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + Path *newpath; + + /* + * We can't use apply_projection_to_path() here, because there + * could already be pointers to these paths, and therefore + * we cannot modify them in place. Instead, we must use + * create_projection_path(). The good news is this won't + * actually insert a Result node into the final plan unless + * it's needed, but the bad news is that it will charge for + * the node whether it's needed or not. Therefore, if the + * target list is already what we need it to be, just leave + * this partial path alone. + */ + if (equal(scanjoin_target->exprs, subpath->pathtarget->exprs)) + continue; + + Assert(subpath->param_info == NULL); + newpath = (Path *) create_projection_path(root, + current_rel, + subpath, + scanjoin_target); + if (is_projection_capable_path(subpath)) + { + /* + * Since the target lists differ, a projection path is + * essential, but it will disappear at plan creation time + * because the subpath is projection-capable. So avoid + * charging anything for the disappearing node. + */ + newpath->startup_cost = subpath->startup_cost; + newpath->total_cost = subpath->total_cost; + } + + lfirst(lc) = newpath; + } + } + else + { + /* + * In the unfortunate event that scanjoin_target is not + * parallel-safe, we can't apply it to the partial paths; in + * that case, we'll need to forget about the partial paths, which + * aren't valid input for upper planning steps. + */ + current_rel->partial_pathlist = NIL; + } + + /* * Save the various upper-rel PathTargets we just computed into * root->upper_targets[]. The core code doesn't use this, but it * provides a convenient place for extensions to get at the info. For @@ -4153,7 +4228,7 @@ create_ordered_paths(PlannerInfo *root, /* Add projection step if needed */ if (path->pathtarget != target) path = apply_projection_to_path(root, ordered_rel, - path, target); + path, target, false); add_path(ordered_rel, path); } diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 552b756..30975e0 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -325,7 +325,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, refnames_tlist); path = apply_projection_to_path(root, rel, path, - create_pathtarget(root, tlist)); + create_pathtarget(root, tlist), + false); /* Return the fully-fledged tlist to caller, too */ *pTargetList = tlist; @@ -394,7 +395,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, path->parent, path, create_pathtarget(root, - *pTargetList)); + *pTargetList), + false); } return path; } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 6b57de3..5b66ff5 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2217,12 +2217,14 @@ create_projection_path(PlannerInfo *root, * 'rel' is the parent relation associated with the result * 'path' is the path representing the source of data * 'target' is the PathTarget to be computed + * 'target_parallel' indicates that target expressions are all parallel-safe */ Path * apply_projection_to_path(PlannerInfo *root, RelOptInfo *rel, Path *path, - PathTarget *target) + PathTarget *target, + bool target_parallel) { QualCost oldcost; @@ -2248,8 +2250,7 @@ apply_projection_to_path(PlannerInfo *root, * project. But if there is something that is not parallel-safe in the * target expressions, then we can't. */ - if (IsA(path, GatherPath) && - !has_parallel_hazard((Node *) target->exprs, false)) + if (IsA(path, GatherPath) && target_parallel) { GatherPath *gpath = (GatherPath *) path; diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 5de4c34..586ecdd 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -143,7 +143,8 @@ extern ProjectionPath *create_projection_path(PlannerInfo *root, extern Path *apply_projection_to_path(PlannerInfo *root, RelOptInfo *rel, Path *path, - PathTarget *target); + PathTarget *target, + bool target_parallel); extern SortPath *create_sort_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index 6f1f247..3c90640 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -51,6 +51,38 @@ select parallel_restricted(unique1) from tenk1 Filter: (tenk1.stringu1 = 'GRAAAA'::name) (9 rows) +-- test parallel plan when group by expression is in target list. +explain (costs off) + select length(stringu1) from tenk1 group by length(stringu1); + QUERY PLAN +--------------------------------------------------- + Finalize HashAggregate + Group Key: (length((stringu1)::text)) + -> Gather + Workers Planned: 4 + -> Partial HashAggregate + Group Key: length((stringu1)::text) + -> Parallel Seq Scan on tenk1 +(7 rows) + +select length(stringu1) from tenk1 group by length(stringu1); + length +-------- + 6 +(1 row) + +-- test that parallel plan for aggregates is not selected when +-- target list contains parallel restricted clause. +explain (costs off) + select sum(parallel_restricted(unique1)) from tenk1 + group by(parallel_restricted(unique1)); + QUERY PLAN +---------------------------------------------------- + HashAggregate + Group Key: parallel_restricted(unique1) + -> Index Only Scan using tenk1_unique1 on tenk1 +(3 rows) + set force_parallel_mode=1; explain (costs off) select stringu1::int2 from tenk1 where unique1 = 1; diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql index 7b607c2..a8cd993 100644 --- a/src/test/regress/sql/select_parallel.sql +++ b/src/test/regress/sql/select_parallel.sql @@ -24,6 +24,17 @@ explain (verbose, costs off) select parallel_restricted(unique1) from tenk1 where stringu1 = 'GRAAAA' order by 1; +-- test parallel plan when group by expression is in target list. +explain (costs off) + select length(stringu1) from tenk1 group by length(stringu1); +select length(stringu1) from tenk1 group by length(stringu1); + +-- test that parallel plan for aggregates is not selected when +-- target list contains parallel restricted clause. +explain (costs off) + select sum(parallel_restricted(unique1)) from tenk1 + group by(parallel_restricted(unique1)); + set force_parallel_mode=1; explain (costs off)
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers