Cosmetics change: remove find_sort_group_clause_by_sortref() function added in
v5 patch version because it duplicates existsing get_sortgroupref_clause().
--
Teodor Sigaev E-mail: teo...@sigaev.ru
WWW: http://www.sigaev.ru/
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index b22b36ec0e..c073eb061a 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -686,7 +686,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
if (opcintype == cur_em->em_datatype &&
equal(expr, cur_em->em_expr))
- return cur_ec; /* Match! */
+ {
+ /*
+ * Match!
+ *
+ * Copy sortref if it wasn't set yet, it's possible if ec was
+ * constructed from WHERE clause, ie it doesn't have target
+ * reference at all
+ */
+ if (cur_ec->ec_sortref == 0 && sortref > 0)
+ cur_ec->ec_sortref = sortref;
+ return cur_ec;
+ }
}
}
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ec66cb9c3c..ca3c78dbbe 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -26,6 +26,7 @@
#include "optimizer/paths.h"
#include "optimizer/tlist.h"
#include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -327,6 +328,171 @@ pathkeys_contained_in(List *keys1, List *keys2)
return false;
}
+/*
+ * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
+ * returns new lists, original GROUP BY lists stay untouched.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+ List **group_clauses)
+{
+ List *new_group_pathkeys= NIL,
+ *new_group_clauses = NIL;
+ ListCell *key;
+ int n;
+
+ if (pathkeys == NIL || *group_pathkeys == NIL)
+ return 0;
+
+ /*
+ * For each pathkey it tries to find corresponding GROUP BY pathkey and
+ * clause.
+ */
+ foreach(key, pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(key);
+ SortGroupClause *sgc;
+
+ /*
+ * Pathkey should use the same allocated struct, so, equiality of
+ * pointers is enough
+ */
+ if (!list_member_ptr(*group_pathkeys, pathkey))
+ break;
+
+ new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+ sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+ *group_clauses);
+ new_group_clauses = lappend(new_group_clauses, sgc);
+ }
+
+ n = list_length(new_group_pathkeys);
+
+ /*
+ * Just append the rest of pathkeys and clauses
+ */
+ *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+ *group_pathkeys);
+ *group_clauses = list_concat_unique_ptr(new_group_clauses,
+ *group_clauses);
+
+ return n;
+}
+
+/*
+ * Work struct from sorting pathkeys
+ */
+typedef struct PathKeyNumGroup
+{
+ PathKey *key;
+ double numGroups;
+ int order; /* just to make qsort stable */
+} PathKeyNumGroup;
+
+static int
+pathkey_numgroups_cmp(const void *a, const void *b)
+{
+ const PathKeyNumGroup *pka = (const PathKeyNumGroup*)a,
+ *pkb = (const PathKeyNumGroup*)b;
+
+ if (pka->numGroups == pkb->numGroups)
+ /* make qsort stable */
+ return (pka->order > pkb->order) ? 1 : -1;
+ return (pka->numGroups > pkb->numGroups) ? -1 : 1;
+}
+
+/*
+ * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
+ * speedup sorting. Returns newly allocated lists, old ones stay untouched.
+ */
+void
+sort_group_key_by_distinct(PlannerInfo *root, double nrows, List *targetList,
+ List **groupPathkeys, List **groupClauses,
+ int startNum)
+{
+ PathKeyNumGroup *keys;
+ int nkeys = list_length(*groupPathkeys) - startNum;
+ List *pathkeyExprList,
+ *newGroupPathkeys = NIL,
+ *newGroupClauses = NIL;
+ ListCell *cell;
+ int i = 0;
+
+ if (nkeys < 2)
+ return; /* nothing to do */
+
+ keys = palloc(nkeys * sizeof(*keys));
+ pathkeyExprList = list_make1(NULL);
+
+ /*
+ * for each pathkeys which aren't supported underlied index (or something
+ * else) we count a number of group to sort them in descending order
+ */
+ for_each_cell(cell, list_nth_cell(*groupPathkeys, startNum))
+ {
+ PathKey *pathkey = (PathKey *) lfirst(cell);
+ Node *pathkeyExpr;
+ SortGroupClause *sgc;
+
+ sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+ *groupClauses);
+ pathkeyExpr = get_sortgroupclause_expr(sgc, targetList);
+ linitial(pathkeyExprList) = pathkeyExpr;
+ keys[i].numGroups= estimate_num_groups(root, pathkeyExprList,
+ nrows, NULL);
+ keys[i].key = pathkey;
+ keys[i].order = i;
+
+ i++;
+ }
+
+ list_free(pathkeyExprList);
+
+ qsort(keys, nkeys, sizeof(*keys), pathkey_numgroups_cmp);
+ i = 0;
+
+ /*
+ * Construct result value
+ */
+ foreach(cell, *groupPathkeys)
+ {
+ PathKey *key;
+ ListCell *gcell;
+
+ if (i < startNum)
+ /* presorted head */
+ key = (PathKey *) lfirst(cell);
+ else
+ /* key, sorted above */
+ key = keys[i - startNum].key;
+
+ newGroupPathkeys = lappend(newGroupPathkeys, key);
+
+ /*
+ * Order GROUP BY clauses the same as path keys
+ * XXX what to do if t's not found?
+ */
+ foreach(gcell, *groupClauses)
+ {
+ SortGroupClause *sgc = (SortGroupClause*) lfirst(gcell);
+
+ if (key->pk_eclass->ec_sortref == sgc->tleSortGroupRef)
+ newGroupClauses = lappend(newGroupClauses, sgc);
+ }
+
+ i++;
+ }
+
+ pfree(keys);
+
+ /* Just append the rest GROUP BY clauses */
+ newGroupClauses = list_concat_unique_ptr(newGroupClauses, *groupClauses);
+
+ *groupPathkeys = newGroupPathkeys;
+ *groupClauses = newGroupClauses;
+}
+
/*
* get_cheapest_path_for_pathkeys
* Find the cheapest path (according to the specified criterion) that
@@ -1611,6 +1777,39 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
return 0; /* path ordering not useful */
}
+/*
+ * pathkeys_useful_for_grouping
+ * Count the number of pathkeys that are useful for grouping (instead of
+ * explicit sort)
+ *
+ * Group pathkeys could be reordered, so we don't bother about actual order in
+ * pathkeys
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+ ListCell *key;
+ int n = 0;
+
+ if (root->group_pathkeys == NIL)
+ return 0; /* no special ordering requested */
+
+ if (pathkeys == NIL)
+ return 0; /* unordered path */
+
+ foreach(key, pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(key);
+
+ if (!list_member_ptr(root->group_pathkeys, pathkey))
+ break;
+
+ n++;
+ }
+
+ return n;
+}
+
/*
* truncate_useless_pathkeys
* Shorten the given pathkey list to just the useful pathkeys.
@@ -1625,6 +1824,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+ if (nuseful2 > nuseful)
+ nuseful = nuseful2;
+ nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
if (nuseful2 > nuseful)
nuseful = nuseful2;
@@ -1660,6 +1862,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
{
if (rel->joininfo != NIL || rel->has_eclass_joins)
return true; /* might be able to use pathkeys for merging */
+ if (root->group_pathkeys != NIL)
+ return true; /* might be able to use pathkeys for grouping */
if (root->query_pathkeys != NIL)
return true; /* might be able to use them for ordering */
return false; /* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 67a2c7a581..16d607348a 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6182,18 +6182,42 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
{
Path *path = (Path *) lfirst(lc);
bool is_sorted;
+ List *group_pathkeys = root->group_pathkeys,
+ *group_clauses = parse->groupClause;
+ int n_preordered_groups;
+
+ if (parse->groupingSets)
+ {
+ is_sorted = pathkeys_contained_in(group_pathkeys,
+ path->pathkeys);
+ }
+ else
+ {
+ n_preordered_groups =
+ group_keys_reorder_by_pathkeys(path->pathkeys,
+ &group_pathkeys,
+ &group_clauses);
+ is_sorted = (n_preordered_groups == list_length(group_pathkeys));
+ }
- is_sorted = pathkeys_contained_in(root->group_pathkeys,
- path->pathkeys);
if (path == cheapest_path || is_sorted)
{
/* Sort the cheapest-total path if it isn't already sorted */
if (!is_sorted)
+ {
+ if (!parse->groupingSets)
+ sort_group_key_by_distinct(root,
+ path->rows,
+ extra->targetList,
+ &group_pathkeys,
+ &group_clauses,
+ n_preordered_groups);
path = (Path *) create_sort_path(root,
grouped_rel,
path,
- root->group_pathkeys,
+ group_pathkeys,
-1.0);
+ }
/* Now decide what to stick atop it */
if (parse->groupingSets)
@@ -6213,9 +6237,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
grouped_rel,
path,
grouped_rel->reltarget,
- parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ group_clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
- parse->groupClause,
+ group_clauses,
havingQual,
agg_costs,
dNumGroups));
@@ -6230,7 +6254,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
create_group_path(root,
grouped_rel,
path,
- parse->groupClause,
+ group_clauses,
havingQual,
dNumGroups));
}
@@ -6251,19 +6275,34 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
foreach(lc, partially_grouped_rel->pathlist)
{
Path *path = (Path *) lfirst(lc);
+ List *group_pathkeys = root->group_pathkeys,
+ *group_clauses = parse->groupClause;
+ bool is_sorted;
+ int n_preordered_groups;
+
+ n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+ &group_pathkeys,
+ &group_clauses);
+ is_sorted = (n_preordered_groups == list_length(group_pathkeys));
/*
* Insert a Sort node, if required. But there's no point in
* sorting anything but the cheapest path.
*/
- if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+ if (!is_sorted)
{
if (path != partially_grouped_rel->cheapest_total_path)
continue;
+ sort_group_key_by_distinct(root,
+ path->rows,
+ extra->targetList,
+ &group_pathkeys,
+ &group_clauses,
+ n_preordered_groups);
path = (Path *) create_sort_path(root,
grouped_rel,
path,
- root->group_pathkeys,
+ group_pathkeys,
-1.0);
}
@@ -6273,9 +6312,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
grouped_rel,
path,
grouped_rel->reltarget,
- parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ group_clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
- parse->groupClause,
+ group_clauses,
havingQual,
agg_final_costs,
dNumGroups));
@@ -6284,7 +6323,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
create_group_path(root,
grouped_rel,
path,
- parse->groupClause,
+ group_clauses,
havingQual,
dNumGroups));
}
@@ -6521,18 +6560,32 @@ create_partial_grouping_paths(PlannerInfo *root,
{
Path *path = (Path *) lfirst(lc);
bool is_sorted;
+ List *group_pathkeys = root->group_pathkeys,
+ *group_clauses = parse->groupClause;
+ int n_preordered_groups;
+
+ n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+ &group_pathkeys,
+ &group_clauses);
+ is_sorted = (n_preordered_groups == list_length(group_pathkeys));
- is_sorted = pathkeys_contained_in(root->group_pathkeys,
- path->pathkeys);
if (path == cheapest_total_path || is_sorted)
{
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
+ {
+ sort_group_key_by_distinct(root,
+ path->rows,
+ extra->targetList,
+ &group_pathkeys,
+ &group_clauses,
+ n_preordered_groups);
path = (Path *) create_sort_path(root,
partially_grouped_rel,
path,
- root->group_pathkeys,
+ group_pathkeys,
-1.0);
+ }
if (parse->hasAggs)
add_path(partially_grouped_rel, (Path *)
@@ -6540,9 +6593,9 @@ create_partial_grouping_paths(PlannerInfo *root,
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
- parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ group_clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
- parse->groupClause,
+ group_clauses,
NIL,
agg_partial_costs,
dNumPartialGroups));
@@ -6551,7 +6604,7 @@ create_partial_grouping_paths(PlannerInfo *root,
create_group_path(root,
partially_grouped_rel,
path,
- parse->groupClause,
+ group_clauses,
NIL,
dNumPartialGroups));
}
@@ -6565,18 +6618,33 @@ create_partial_grouping_paths(PlannerInfo *root,
{
Path *path = (Path *) lfirst(lc);
bool is_sorted;
+ List *group_pathkeys = root->group_pathkeys,
+ *group_clauses = parse->groupClause;
+ int n_preordered_groups;
+
+ n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+ &group_pathkeys,
+ &group_clauses);
+ is_sorted = (n_preordered_groups == list_length(group_pathkeys));
- is_sorted = pathkeys_contained_in(root->group_pathkeys,
- path->pathkeys);
if (path == cheapest_partial_path || is_sorted)
{
+
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
+ {
+ sort_group_key_by_distinct(root,
+ path->rows,
+ extra->targetList,
+ &group_pathkeys,
+ &group_clauses,
+ n_preordered_groups);
path = (Path *) create_sort_path(root,
partially_grouped_rel,
path,
- root->group_pathkeys,
+ group_pathkeys,
-1.0);
+ }
if (parse->hasAggs)
add_partial_path(partially_grouped_rel, (Path *)
@@ -6584,9 +6652,9 @@ create_partial_grouping_paths(PlannerInfo *root,
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
- parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ group_clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
- parse->groupClause,
+ group_clauses,
NIL,
agg_partial_costs,
dNumPartialPartialGroups));
@@ -6595,7 +6663,7 @@ create_partial_grouping_paths(PlannerInfo *root,
create_group_path(root,
partially_grouped_rel,
path,
- parse->groupClause,
+ group_clauses,
NIL,
dNumPartialPartialGroups));
}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index cafde307ad..fb211cc32f 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -190,6 +190,15 @@ typedef enum
extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
extern bool pathkeys_contained_in(List *keys1, List *keys2);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+ List **group_pathkeys,
+ List **group_clauses);
+extern void sort_group_key_by_distinct(PlannerInfo *root,
+ double nrows,
+ List *targetList,
+ List **groupPathkeys,
+ List **groupClauses,
+ int startNum);
extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
Relids required_outer,
CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f85e913850..081ac4ac28 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2065,3 +2065,152 @@ SELECT balk(hundred) FROM tenk1;
(1 row)
ROLLBACK;
+-- GROUP BY optimization by reorder columns by uniqueness
+SELECT
+ i AS id,
+ i/2 AS p,
+ format('%60s', i%2) AS v,
+ i/4 AS c,
+ i/8 AS d
+ INTO btg
+FROM
+ generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+ QUERY PLAN
+-----------------------------
+ GroupAggregate
+ Group Key: p, v
+ -> Sort
+ Sort Key: p, v
+ -> Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+ QUERY PLAN
+-----------------------------
+ GroupAggregate
+ Group Key: p, v
+ -> Sort
+ Sort Key: p, v
+ -> Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+ QUERY PLAN
+-----------------------------
+ GroupAggregate
+ Group Key: p, c, v
+ -> Sort
+ Sort Key: p, c, v
+ -> Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+ QUERY PLAN
+------------------------------
+ GroupAggregate
+ Group Key: p, c, d, v
+ -> Sort
+ Sort Key: p, c, d, v
+ -> Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+ QUERY PLAN
+------------------------------------------------
+ GroupAggregate
+ Group Key: p, v
+ -> Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+ QUERY PLAN
+------------------------------------------------
+ GroupAggregate
+ Group Key: p, v
+ -> Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+ QUERY PLAN
+------------------------------------------------
+ GroupAggregate
+ Group Key: p, v
+ -> Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+ QUERY PLAN
+------------------------------------------------
+ GroupAggregate
+ Group Key: p, v
+ -> Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+ QUERY PLAN
+-------------------------------------------------
+ GroupAggregate
+ Group Key: p, v, c
+ -> Sort
+ Sort Key: p, v, c
+ -> Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+ QUERY PLAN
+-------------------------------------------------
+ GroupAggregate
+ Group Key: p, v, c
+ -> Sort
+ Sort Key: p, v, c
+ -> Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+ QUERY PLAN
+-------------------------------------------------
+ GroupAggregate
+ Group Key: p, v, c, d
+ -> Sort
+ Sort Key: p, v, c, d
+ -> Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+ QUERY PLAN
+-------------------------------------------------
+ GroupAggregate
+ Group Key: p, v, c, d
+ -> Sort
+ Sort Key: p, v, c, d
+ -> Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index b983f9c506..3915a837f0 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1140,7 +1140,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
QUERY PLAN
--------------------------------------------------------------------------------
GroupAggregate
- Group Key: t1.c, t2.c, t3.c
+ Group Key: t1.c, t3.c, t2.c
-> Sort
Sort Key: t1.c, t3.c
-> Append
@@ -1284,7 +1284,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
QUERY PLAN
--------------------------------------------------------------------------------
GroupAggregate
- Group Key: t1.c, t2.c, t3.c
+ Group Key: t1.c, t3.c, t2.c
-> Sort
Sort Key: t1.c, t3.c
-> Append
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 054a381dad..adecec8872 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -281,9 +281,9 @@ EXPLAIN (COSTS off)
QUERY PLAN
-----------------------------------
GroupAggregate
- Group Key: a, b
+ Group Key: b, a
-> Sort
- Sort Key: a, b
+ Sort Key: b, a
-> Seq Scan on ndistinct
(5 rows)
@@ -292,9 +292,9 @@ EXPLAIN (COSTS off)
QUERY PLAN
-----------------------------------
GroupAggregate
- Group Key: a, b, c
+ Group Key: b, a, c
-> Sort
- Sort Key: a, b, c
+ Sort Key: b, a, c
-> Seq Scan on ndistinct
(5 rows)
@@ -303,9 +303,9 @@ EXPLAIN (COSTS off)
QUERY PLAN
-----------------------------------
GroupAggregate
- Group Key: a, b, c, d
+ Group Key: d, b, a, c
-> Sort
- Sort Key: a, b, c, d
+ Sort Key: d, b, a, c
-> Seq Scan on ndistinct
(5 rows)
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 506d0442d7..b6b2c5c26e 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -907,3 +907,72 @@ EXPLAIN (COSTS OFF) SELECT balk(hundred) FROM tenk1;
SELECT balk(hundred) FROM tenk1;
ROLLBACK;
+
+-- GROUP BY optimization by reorder columns by uniqueness
+
+SELECT
+ i AS id,
+ i/2 AS p,
+ format('%60s', i%2) AS v,
+ i/4 AS c,
+ i/8 AS d
+ INTO btg
+FROM
+ generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+