Re: POC: GROUP BY optimization

Teodor Sigaev Wed, 30 May 2018 09:18:55 -0700

Cosmetics change: remove find_sort_group_clause_by_sortref() function added inv5 patch version because it duplicates existsing get_sortgroupref_clause().


--
Teodor Sigaev                                   E-mail: [email protected]
                                                   WWW: http://www.sigaev.ru/

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index b22b36ec0e..c073eb061a 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -686,7 +686,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ec66cb9c3c..ca3c78dbbe 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -26,6 +26,7 @@
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -327,6 +328,171 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
+ * returns new lists,  original GROUP BY lists stay untouched.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List		*new_group_pathkeys= NIL,
+				*new_group_clauses = NIL;
+	ListCell	*key;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * For each pathkey it tries to find corresponding GROUP BY pathkey and
+	 * clause.
+	 */
+	foreach(key, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(key);
+		SortGroupClause	*sgc;
+
+		/*
+		 * Pathkey should use the same allocated struct, so, equiality of
+		 * pointers is enough
+		 */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	n = list_length(new_group_pathkeys);
+
+	/*
+	 * Just append the rest of pathkeys and clauses
+	 */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+												 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * Work struct from sorting pathkeys
+ */
+typedef struct PathKeyNumGroup
+{
+	PathKey	   *key;
+	double		numGroups;
+	int			order; /* just to make qsort stable */
+} PathKeyNumGroup;
+
+static int
+pathkey_numgroups_cmp(const void *a, const void *b)
+{
+	const PathKeyNumGroup *pka = (const PathKeyNumGroup*)a,
+						  *pkb = (const PathKeyNumGroup*)b;
+
+	if (pka->numGroups == pkb->numGroups)
+		/* make qsort stable */
+		return (pka->order > pkb->order) ? 1 : -1;
+	return (pka->numGroups > pkb->numGroups) ? -1 : 1;
+}
+
+/*
+ * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
+ * speedup sorting. Returns newly allocated lists, old ones stay untouched.
+ */
+void
+sort_group_key_by_distinct(PlannerInfo *root, double nrows, List *targetList,
+						   List **groupPathkeys, List **groupClauses,
+						   int startNum)
+{
+	PathKeyNumGroup	   *keys;
+	int					nkeys = list_length(*groupPathkeys) - startNum;
+	List			   *pathkeyExprList,
+					   *newGroupPathkeys = NIL,
+					   *newGroupClauses = NIL;
+	ListCell		   *cell;
+	int					i = 0;
+
+	if (nkeys < 2)
+		return; /* nothing to do */
+
+	keys = palloc(nkeys * sizeof(*keys));
+	pathkeyExprList = list_make1(NULL);
+
+	/*
+	 * for each pathkeys which aren't supported underlied index (or something
+	 * else) we count a number of group to sort them in descending order
+	 */
+	for_each_cell(cell, list_nth_cell(*groupPathkeys, startNum))
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+		Node			*pathkeyExpr;
+		SortGroupClause	*sgc;
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *groupClauses);
+		pathkeyExpr = get_sortgroupclause_expr(sgc, targetList);
+		linitial(pathkeyExprList) = pathkeyExpr;
+		keys[i].numGroups= estimate_num_groups(root, pathkeyExprList,
+											   nrows, NULL);
+		keys[i].key = pathkey;
+		keys[i].order = i;
+
+		i++;
+	}
+
+	list_free(pathkeyExprList);
+
+	qsort(keys, nkeys, sizeof(*keys), pathkey_numgroups_cmp);
+	i = 0;
+
+	/*
+	 * Construct result value
+	 */
+	foreach(cell, *groupPathkeys)
+	{
+		PathKey	   *key;
+		ListCell   *gcell;
+
+		if (i < startNum)
+			/* presorted head */
+			key = (PathKey *) lfirst(cell);
+		else
+			/* key, sorted above */
+			key = keys[i - startNum].key;
+
+		newGroupPathkeys = lappend(newGroupPathkeys, key);
+
+		/*
+		 * Order GROUP BY clauses the same as path keys
+		 * XXX what to do if t's not found?
+		 */
+		foreach(gcell, *groupClauses)
+		{
+			SortGroupClause *sgc = (SortGroupClause*) lfirst(gcell);
+
+			if (key->pk_eclass->ec_sortref == sgc->tleSortGroupRef)
+				newGroupClauses = lappend(newGroupClauses, sgc);
+		}
+
+		i++;
+	}
+
+	pfree(keys);
+
+	/* Just append the rest GROUP BY clauses */
+	newGroupClauses = list_concat_unique_ptr(newGroupClauses, *groupClauses);
+
+	*groupPathkeys = newGroupPathkeys;
+	*groupClauses = newGroupClauses;
+}
+
 /*
  * get_cheapest_path_for_pathkeys
  *	  Find the cheapest path (according to the specified criterion) that
@@ -1611,6 +1777,39 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return 0;					/* path ordering not useful */
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered, so we don't bother about actual order in
+ * pathkeys
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	if (root->group_pathkeys == NIL)
+		return 0;				/* no special ordering requested */
+
+	if (pathkeys == NIL)
+		return 0;				/* unordered path */
+
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1625,6 +1824,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1660,6 +1862,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */ 
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 67a2c7a581..16d607348a 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6182,18 +6182,42 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			if (parse->groupingSets)
+			{
+				is_sorted = pathkeys_contained_in(group_pathkeys,
+												  path->pathkeys);
+			}
+			else
+			{
+				n_preordered_groups =
+						group_keys_reorder_by_pathkeys(path->pathkeys,
+													   &group_pathkeys,
+													   &group_clauses);
+				is_sorted = (n_preordered_groups == list_length(group_pathkeys));
+			}
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_path || is_sorted)
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
 				if (!is_sorted)
+				{
+					if (!parse->groupingSets)
+						sort_group_key_by_distinct(root,
+												   path->rows,
+												   extra->targetList,
+												   &group_pathkeys,
+												   &group_clauses,
+												   n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6213,9 +6237,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6230,7 +6254,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6251,19 +6275,34 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
 				Path	   *path = (Path *) lfirst(lc);
+				List	   *group_pathkeys = root->group_pathkeys,
+						   *group_clauses = parse->groupClause;
+				bool		is_sorted;
+				int			n_preordered_groups;
+
+				n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																	 &group_pathkeys,
+																	 &group_clauses);
+				is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
 				/*
 				 * Insert a Sort node, if required.  But there's no point in
 				 * sorting anything but the cheapest path.
 				 */
-				if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+				if (!is_sorted)
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
+					sort_group_key_by_distinct(root,
+											   path->rows,
+											   extra->targetList,
+											   &group_pathkeys,
+											   &group_clauses,
+											   n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 				}
 
@@ -6273,9 +6312,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
@@ -6284,7 +6323,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 			}
@@ -6521,18 +6560,32 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_total_path || is_sorted)
 			{
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					sort_group_key_by_distinct(root,
+											   path->rows,
+											   extra->targetList,
+											   &group_pathkeys,
+											   &group_clauses,
+											   n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_path(partially_grouped_rel, (Path *)
@@ -6540,9 +6593,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 											 partially_grouped_rel,
 											 path,
 											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 NIL,
 											 agg_partial_costs,
 											 dNumPartialGroups));
@@ -6551,7 +6604,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 							 create_group_path(root,
 											   partially_grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   NIL,
 											   dNumPartialGroups));
 			}
@@ -6565,18 +6618,33 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_partial_path || is_sorted)
 			{
+
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					sort_group_key_by_distinct(root,
+											   path->rows,
+											   extra->targetList,
+											   &group_pathkeys,
+											   &group_clauses,
+											   n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
@@ -6584,9 +6652,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 group_clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 group_clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6595,7 +6663,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   group_clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index cafde307ad..fb211cc32f 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -190,6 +190,15 @@ typedef enum
 
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
+extern void sort_group_key_by_distinct(PlannerInfo *root,
+									   double nrows,
+									   List *targetList,
+									   List **groupPathkeys,
+									   List **groupClauses,
+									   int	startNum);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f85e913850..081ac4ac28 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2065,3 +2065,152 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns by uniqueness
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, c, d, v
+   ->  Sort
+         Sort Key: p, c, d, v
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index b983f9c506..3915a837f0 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1140,7 +1140,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1284,7 +1284,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 054a381dad..adecec8872 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -281,9 +281,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b
+   Group Key: b, a
    ->  Sort
-         Sort Key: a, b
+         Sort Key: b, a
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -292,9 +292,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c
+   Group Key: b, a, c
    ->  Sort
-         Sort Key: a, b, c
+         Sort Key: b, a, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -303,9 +303,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c, d
+   Group Key: d, b, a, c
    ->  Sort
-         Sort Key: a, b, c, d
+         Sort Key: d, b, a, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 506d0442d7..b6b2c5c26e 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -907,3 +907,72 @@ EXPLAIN (COSTS OFF) SELECT balk(hundred) FROM tenk1;
 SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
+
+-- GROUP BY optimization by reorder columns by uniqueness
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+

Re: POC: GROUP BY optimization

Reply via email to