On Wed, Mar 7, 2018 at 10:04 AM, Ashutosh Bapat <ashutosh.ba...@enterprisedb.com> wrote: > On Tue, Mar 6, 2018 at 7:52 PM, Jeevan Chalke > <jeevan.cha...@enterprisedb.com> wrote: > >> >> >> Changes look good to me and refactoring will be useful for partitionwise >> patches. >> >> However, will it be good if we add agg_costs into the GroupPathExtraData >> too? >> Also can we pass this to the add_partial_paths_to_grouping_rel() and >> add_paths_to_grouping_rel() to avoid passing can_sort, can_hash and costs >> related details individually to them? > > I think so too.
Here's patch doing that. agg_costs is calculated way before we populate other members of GroupPathExtraData, which means that we either set the pointer to agg_costs in GroupPathExtraData or memcpy its contents. The first option will make GroupPathExtraData asymmetric about the costs it holds, some as pointers and some as whole structure.Holding whole structures allows us to compute those anywhere without worrying about memory allocation or variable life time. So, I am reluctant to make all costs as pointers. So, I have not added agg_costs to GroupPathExtraData yet. We could make GroupPathExtraData as a variable in grouping_planner() and populate its members as we progress. But I think that's digression from the original purpose of the patch. I observe that we are computing agg_costs, number of groups etc. again in postgres_fdw so there seems to be a merit in passing those values as GroupPathExtraData to FDW as well like what you have done with OtherUpperExtraData. But we will come to that once we have straightened the partition-wise aggregate patches. -- Best Wishes, Ashutosh Bapat EnterpriseDB Corporation The Postgres Database Company
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index de1257d..d47dc7e 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -109,6 +109,28 @@ typedef struct int *tleref_to_colnum_map; } grouping_sets_data; +/* + * Struct for extra information passed to create_grouping_paths + * + * can_hash is true if hash-based grouping is possible, false otherwise. + * can_sort is true if sort-based grouping is possible, false otherwise. + * can_partial_agg is true if partial aggregation is possible, false otherwise. + * partial_costs_set indicates whether agg_partial_costs and agg_final_costs + * have valid costs set. Both of those are computed only when partial + * aggregation is required. + * agg_partial_costs gives partial aggregation costs. + * agg_final_costs gives finalization costs. + */ +typedef struct +{ + bool can_hash; + bool can_sort; + bool can_partial_agg; + bool partial_costs_set; + AggClauseCosts agg_partial_costs; + AggClauseCosts agg_final_costs; +} GroupPathExtraData; + /* Local functions */ static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); @@ -138,7 +160,8 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, const AggClauseCosts *agg_costs, - grouping_sets_data *gd); + grouping_sets_data *gd, + GroupPathExtraData *extra); static void consider_groupingsets_paths(PlannerInfo *root, RelOptInfo *grouped_rel, Path *path, @@ -190,18 +213,20 @@ static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, RelOptInfo *partially_grouped_rel, const AggClauseCosts *agg_costs, - const AggClauseCosts *agg_final_costs, - grouping_sets_data *gd, bool can_sort, bool can_hash, + grouping_sets_data *gd, + GroupPathExtraData *extra, double dNumGroups, List *havingQual); static void add_paths_to_partial_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *partially_grouped_rel, - AggClauseCosts *agg_partial_costs, grouping_sets_data *gd, - bool can_sort, - bool can_hash); + GroupPathExtraData *extra); static bool can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel, - RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs); + RelOptInfo *grouped_rel, GroupPathExtraData *extra); +static void compute_group_path_extra_data(PlannerInfo *root, + GroupPathExtraData *extra, + grouping_sets_data *gd, + const AggClauseCosts *agg_costs); /***************************************************************************** @@ -1981,11 +2006,17 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, */ if (have_grouping) { + GroupPathExtraData group_extra; + + compute_group_path_extra_data(root, &group_extra, gset_data, + &agg_costs); + current_rel = create_grouping_paths(root, current_rel, grouping_target, &agg_costs, - gset_data); + gset_data, + &group_extra); /* Fix things up if grouping_target contains SRFs */ if (parse->hasTargetSRFs) adjust_paths_for_srfs(root, current_rel, @@ -3595,6 +3626,67 @@ estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs, } /* + * compute_group_path_extra_data + * Compute extra information required for grouping operation specified in + * the query. + */ +static void +compute_group_path_extra_data(PlannerInfo *root, GroupPathExtraData *extra, + grouping_sets_data *gd, + const AggClauseCosts *agg_costs) +{ + Query *parse = root->parse; + + /* + * Determine whether it's possible to perform sort-based implementations + * of grouping. (Note that if groupClause is empty, + * grouping_is_sortable() is trivially true, and all the + * pathkeys_contained_in() tests will succeed too, so that we'll consider + * every surviving input path.) + * + * If we have grouping sets, we might be able to sort some but not all of + * them; in this case, we need can_sort to be true as long as we must + * consider any sorted-input plan. + */ + extra->can_sort = (gd && gd->rollups != NIL) || + grouping_is_sortable(parse->groupClause); + + /* + * Determine whether we should consider hash-based implementations of + * grouping. + * + * Hashed aggregation only applies if we're grouping. If we have grouping + * sets, some groups might be hashable but others not; in this case we set + * can_hash true as long as there is nothing globally preventing us from + * hashing (and we should therefore consider plans with hashes). + * + * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY + * aggregates. (Doing so would imply storing *all* the input values in + * the hash table, and/or running many sorts in parallel, either of which + * seems like a certain loser.) We similarly don't support ordered-set + * aggregates in hashed aggregation, but that case is also included in the + * numOrderedAggs count. + * + * Note: grouping_is_hashable() is much more expensive to check than the + * other gating conditions, so we want to do it last. + */ + extra->can_hash = (parse->groupClause != NIL && + agg_costs->numOrderedAggs == 0 && + (gd ? gd->any_hashable : + grouping_is_hashable(parse->groupClause))); + + /* + * Set partial aggregation costs if we are going to calculate partial + * aggregates in create_grouping_paths(). + */ + extra->partial_costs_set = false; + + /* Is partial aggregation possible? */ + extra->can_partial_agg = (agg_costs->hasNonPartial || + agg_costs->hasNonSerial); +} + +/* * create_grouping_paths * * Build a new upperrel containing Paths for grouping and/or aggregation. @@ -3624,17 +3716,16 @@ create_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, const AggClauseCosts *agg_costs, - grouping_sets_data *gd) + grouping_sets_data *gd, + GroupPathExtraData *extra) { Query *parse = root->parse; Path *cheapest_path = input_rel->cheapest_total_path; RelOptInfo *grouped_rel; RelOptInfo *partially_grouped_rel; - AggClauseCosts agg_partial_costs; /* parallel only */ - AggClauseCosts agg_final_costs; /* parallel only */ + AggClauseCosts *agg_partial_costs = &extra->agg_partial_costs; + AggClauseCosts *agg_final_costs = &extra->agg_final_costs; double dNumGroups; - bool can_hash; - bool can_sort; bool try_parallel_aggregation; /* @@ -3750,48 +3841,11 @@ create_grouping_paths(PlannerInfo *root, gd); /* - * Determine whether it's possible to perform sort-based implementations - * of grouping. (Note that if groupClause is empty, - * grouping_is_sortable() is trivially true, and all the - * pathkeys_contained_in() tests will succeed too, so that we'll consider - * every surviving input path.) - * - * If we have grouping sets, we might be able to sort some but not all of - * them; in this case, we need can_sort to be true as long as we must - * consider any sorted-input plan. - */ - can_sort = (gd && gd->rollups != NIL) - || grouping_is_sortable(parse->groupClause); - - /* - * Determine whether we should consider hash-based implementations of - * grouping. - * - * Hashed aggregation only applies if we're grouping. If we have grouping - * sets, some groups might be hashable but others not; in this case we set - * can_hash true as long as there is nothing globally preventing us from - * hashing (and we should therefore consider plans with hashes). - * - * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY - * aggregates. (Doing so would imply storing *all* the input values in - * the hash table, and/or running many sorts in parallel, either of which - * seems like a certain loser.) We similarly don't support ordered-set - * aggregates in hashed aggregation, but that case is also included in the - * numOrderedAggs count. - * - * Note: grouping_is_hashable() is much more expensive to check than the - * other gating conditions, so we want to do it last. - */ - can_hash = (parse->groupClause != NIL && - agg_costs->numOrderedAggs == 0 && - (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))); - - /* * Figure out whether a PartialAggregate/Finalize Aggregate execution * strategy is viable. */ try_parallel_aggregation = can_parallel_agg(root, input_rel, grouped_rel, - agg_costs); + extra); /* * Before generating paths for grouped_rel, we first generate any possible @@ -3812,39 +3866,45 @@ create_grouping_paths(PlannerInfo *root, partial_grouping_target = make_partial_grouping_target(root, target); partially_grouped_rel->reltarget = partial_grouping_target; - /* - * Collect statistics about aggregates for estimating costs of - * performing aggregation in parallel. - */ - MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts)); - MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts)); - if (parse->hasAggs) + /* Set partial aggregation costs, if not already computed. */ + if (!extra->partial_costs_set) { - /* partial phase */ - get_agg_clause_costs(root, (Node *) partial_grouping_target->exprs, - AGGSPLIT_INITIAL_SERIAL, - &agg_partial_costs); - - /* final phase */ - get_agg_clause_costs(root, (Node *) target->exprs, - AGGSPLIT_FINAL_DESERIAL, - &agg_final_costs); - get_agg_clause_costs(root, parse->havingQual, - AGGSPLIT_FINAL_DESERIAL, - &agg_final_costs); + /* + * Collect statistics about aggregates for estimating costs of + * performing aggregation in parallel. + */ + MemSet(agg_partial_costs, 0, sizeof(AggClauseCosts)); + MemSet(agg_final_costs, 0, sizeof(AggClauseCosts)); + if (parse->hasAggs) + { + /* partial phase */ + get_agg_clause_costs(root, + (Node *) partial_grouping_target->exprs, + AGGSPLIT_INITIAL_SERIAL, + agg_partial_costs); + + /* final phase */ + get_agg_clause_costs(root, (Node *) target->exprs, + AGGSPLIT_FINAL_DESERIAL, + agg_final_costs); + get_agg_clause_costs(root, parse->havingQual, + AGGSPLIT_FINAL_DESERIAL, + agg_final_costs); + } + + extra->partial_costs_set = true; } add_paths_to_partial_grouping_rel(root, input_rel, partially_grouped_rel, - &agg_partial_costs, - gd, can_sort, can_hash); + gd, extra); } /* Build final grouping paths */ add_paths_to_grouping_rel(root, input_rel, grouped_rel, target, partially_grouped_rel, agg_costs, - &agg_final_costs, gd, can_sort, can_hash, - dNumGroups, (List *) parse->havingQual); + gd, extra, dNumGroups, + (List *) parse->havingQual); /* Give a helpful error if we failed to find any implementation */ if (grouped_rel->pathlist == NIL) @@ -6006,13 +6066,16 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, RelOptInfo *partially_grouped_rel, const AggClauseCosts *agg_costs, - const AggClauseCosts *agg_final_costs, - grouping_sets_data *gd, bool can_sort, bool can_hash, + grouping_sets_data *gd, + GroupPathExtraData *extra, double dNumGroups, List *havingQual) { Query *parse = root->parse; Path *cheapest_path = input_rel->cheapest_total_path; ListCell *lc; + AggClauseCosts *agg_final_costs = &extra->agg_final_costs; + bool can_sort = extra->can_sort; + bool can_hash = extra->can_hash; if (can_sort) { @@ -6219,16 +6282,18 @@ static void add_paths_to_partial_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *partially_grouped_rel, - AggClauseCosts *agg_partial_costs, grouping_sets_data *gd, - bool can_sort, - bool can_hash) + GroupPathExtraData *extra) { Query *parse = root->parse; Path *cheapest_partial_path = linitial(input_rel->partial_pathlist); Size hashaggtablesize; double dNumPartialGroups = 0; ListCell *lc; + AggClauseCosts *agg_partial_costs = &extra->agg_partial_costs; + bool can_sort = extra->can_sort; + bool can_hash = extra->can_hash; + /* Estimate number of partial groups. */ dNumPartialGroups = get_number_of_groups(root, @@ -6380,7 +6445,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root, */ static bool can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel, - RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs) + RelOptInfo *grouped_rel, GroupPathExtraData *extra) { Query *parse = root->parse; @@ -6407,7 +6472,7 @@ can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel, /* We don't know how to do grouping sets in parallel. */ return false; } - else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial) + else if (extra->can_partial_agg) { /* Insufficient support for partial mode. */ return false;