Re: Remove useless GROUP BY columns considering unique index

jian he Thu, 28 Nov 2024 23:05:05 -0800

On Fri, Nov 29, 2024 at 2:36 PM Andrei Lepikhov <[email protected]> wrote:
>
> > I forgot to do a local commit before sending v8. Fixed in the attached v9.
> 1. Thread reference in the patch comment doesn't work.
fixed.


I found that unique expression index can also be used for groupby
column removal.
so I implemented it, aslo added two tests on it.

now remove_useless_groupby_columns like:
  if (index->indexprs == NIL)
  {
    --handle unique index
  }
  else
  {
    --handle unique expression index
  }

what do you think?

From ca9302a8c98f2ad4a8f9837ec42ec2da6588e25d Mon Sep 17 00:00:00 2001
From: jian he <[email protected]>
Date: Fri, 29 Nov 2024 14:54:23 +0800
Subject: [PATCH v10 1/1] remove useless group by columns via unique-not-null

In the `remove_useless_groupby_columns` function,
use a unique index and a not-null constraint to eliminate unnecessary columns.
Here, the primary key is treated as a "unique index and not-null".
If there are multiple candidates,
use the unique index with the fewest columns to remove the other columns
in groupby expression.

discussion: https://postgr.es/m/327990c8-b9b2-4b0c-bffb-462249f82de0@Spark
---
 src/backend/optimizer/plan/initsplan.c   | 258 ++++++++++++++++++++++-
 src/backend/optimizer/plan/planmain.c    |   3 +
 src/backend/optimizer/plan/planner.c     | 163 --------------
 src/backend/optimizer/util/plancat.c     |   1 +
 src/include/nodes/pathnodes.h            |   2 +
 src/include/optimizer/planmain.h         |   1 +
 src/test/regress/expected/aggregates.out |  79 +++++++
 src/test/regress/sql/aggregates.sql      |  39 ++++
 8 files changed, 382 insertions(+), 164 deletions(-)

diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 903c397d40..bd82745f1e 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -1,7 +1,7 @@
 /*-------------------------------------------------------------------------
  *
  * initsplan.c
- *	  Target list, qualification, joininfo initialization routines
+ *	  Target list, group by, qualification, joininfo initialization routines
  *
  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -387,6 +387,262 @@ add_vars_to_attr_needed(PlannerInfo *root, List *vars,
 }
 
 
+/*****************************************************************************
+ *
+ *	  GROUP BY
+ *
+ *****************************************************************************/
+
+/*
+ * remove_useless_groupby_columns
+ *		Remove any columns in the GROUP BY clause that are redundant due to
+ *		being functionally dependent on other GROUP BY columns.
+ *
+ * Since some other DBMSes do not allow references to ungrouped columns, it's
+ * not unusual to find all columns listed in GROUP BY even though listing the
+ * primary-key columns, or columns of a unique constraint would be sufficient.
+ * Deleting such excess columns avoids redundant sorting or hashing work, so
+ * it's worth doing.
+ *
+ * Relcache invalidations will ensure that cached plans become invalidated
+ * when the underlying supporting indexes are dropped or if a column's NOT
+ * NULL attribute is removed.
+ */
+void
+remove_useless_groupby_columns(PlannerInfo *root)
+{
+	Query	   *parse = root->parse;
+	Bitmapset **groupbyattnos;
+	Bitmapset **surplusvars;
+	ListCell   *lc;
+	int			relid;
+
+	/* No chance to do anything if there are less than two GROUP BY items */
+	if (list_length(root->processed_groupClause) < 2)
+		return;
+
+	/* Don't fiddle with the GROUP BY clause if the query has grouping sets */
+	if (parse->groupingSets)
+		return;
+
+	/*
+	 * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
+	 * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
+	 * that are GROUP BY items.
+	 */
+	groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
+										   (list_length(parse->rtable) + 1));
+	foreach(lc, root->processed_groupClause)
+	{
+		SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
+		TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
+		Var		   *var = (Var *) tle->expr;
+
+		/*
+		 * Ignore non-Vars and Vars from other query levels.
+		 *
+		 * XXX in principle, stable expressions containing Vars could also be
+		 * removed, if all the Vars are functionally dependent on other GROUP
+		 * BY items.  But it's not clear that such cases occur often enough to
+		 * be worth troubling over.
+		 */
+		if (!IsA(var, Var) ||
+			var->varlevelsup > 0)
+			continue;
+
+		/* OK, remember we have this Var */
+		relid = var->varno;
+		Assert(relid <= list_length(parse->rtable));
+		groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
+											  var->varattno - FirstLowInvalidHeapAttributeNumber);
+	}
+
+	/*
+	 * Consider each relation and see if it is possible to remove some of its
+	 * Vars from GROUP BY.  For simplicity and speed, we do the actual removal
+	 * in a separate pass.  Here, we just fill surplusvars[k] with a bitmapset
+	 * of the column attnos of RTE k that are removable GROUP BY items.
+	 */
+	surplusvars = NULL;			/* don't allocate array unless required */
+	relid = 0;
+	foreach(lc, parse->rtable)
+	{
+		RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
+		RelOptInfo *rel;
+		Bitmapset  *relattnos;
+		Bitmapset  *best_keycolumns = NULL;
+		int32		best_nkeycolumns = PG_INT32_MAX;
+
+		relid++;
+
+		/* Only plain relations could have primary-key constraints */
+		if (rte->rtekind != RTE_RELATION)
+			continue;
+
+		/*
+		 * We must skip inheritance parent tables as some of the child rels
+		 * may cause duplicate rows.  This cannot happen with partitioned
+		 * tables, however.
+		 */
+		if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
+			continue;
+
+		/* Nothing to do unless this rel has multiple Vars in GROUP BY */
+		relattnos = groupbyattnos[relid];
+		if (bms_membership(relattnos) != BMS_MULTIPLE)
+			continue;
+
+		rel = root->simple_rel_array[relid];
+
+		/*
+		 * Now search each index to check if there are any indexes where the
+		 * indexed columns are a subset of the GROUP BY columns for this
+		 * relation.
+		 */
+		foreach_node(IndexOptInfo, index, rel->indexlist)
+		{
+			Bitmapset  *ind_attnos;
+			bool		nulls_check_ok;
+
+			/*
+			 * Skip any non-unique and deferrable indexes.  Predicate indexes
+			 * have not been checked yet, so we must skip those too as the
+			 * predOK check might fail.
+			 */
+			if (!index->unique || !index->immediate || index->indpred != NIL)
+				continue;
+
+			ind_attnos = NULL;
+			nulls_check_ok = true;
+			if (index->indexprs == NIL)
+			{
+				for (int i = 0; i < index->nkeycolumns; i++)
+				{
+					/*
+					* We must insist that the index columns are all defined NOT
+					* NULL otherwise duplicate NULLs could exists.  However, we
+					* can relax this check when the index is defined with NULLS
+					* NOT DISTINCT as there can only be 1 NULL row, therefore
+					* functional dependency on the unique columns is maintained,
+					* despite the NULL.
+					*/
+					if (!index->nullsnotdistinct &&
+						!bms_is_member(index->indexkeys[i],
+									rel->notnullattnums))
+					{
+						nulls_check_ok = false;
+						break;
+					}
+
+					ind_attnos =
+						bms_add_member(ind_attnos,
+									index->indexkeys[i] -
+									FirstLowInvalidHeapAttributeNumber);
+				}
+			}
+			else
+			{
+				int			attrnos = -1;
+
+				Bitmapset  *expr_attrs = NULL;
+				ListCell   *indexpr_item = NULL;
+
+				indexpr_item = list_head(index->indexprs);
+				pull_varattnos((Node *) lfirst(indexpr_item), 1, &expr_attrs);
+
+				/* 
+				 * pull_varattnos returned varattnos is offset by
+				 * FirstLowInvalidHeapAttributeNumber. we need add
+				 * FirstLowInvalidHeapAttributeNumber for comparison with
+				 * rel->notnullattnums
+				*/
+				while ((attrnos = bms_next_member(expr_attrs, attrnos)) >= 0)
+				{
+					if (!index->nullsnotdistinct &&
+						!bms_is_member(attrnos + FirstLowInvalidHeapAttributeNumber, rel->notnullattnums))
+					{
+						nulls_check_ok = false;
+						break;
+					}
+				}
+
+				ind_attnos = expr_attrs;
+			}
+
+			if (!nulls_check_ok)
+				continue;
+
+			/*
+			 * Skip any indexes where the indexed columns aren't a subset of
+			 * the GROUP BY.
+			 */
+			if (bms_subset_compare(ind_attnos, relattnos) != BMS_SUBSET1)
+				continue;
+
+			/*
+			 * Record the attribute numbers from the index with the fewest
+			 * columns.  This allows the largest number of columns to be
+			 * removed from the GROUP BY clause.  In the future, we may wish
+			 * to consider using the narrowest set of columns and looking at
+			 * pg_statistic.stawidth as it might be better to use an index
+			 * with, say two INT4s, rather than, say, one long varlena column.
+			 */
+			if (index->nkeycolumns < best_nkeycolumns)
+			{
+				best_keycolumns = ind_attnos;
+				best_nkeycolumns = index->nkeycolumns;
+			}
+		}
+
+		/* Did we find a suitable index? */
+		if (best_keycolumns != NULL)
+		{
+			/*
+			 * To easily remember whether we've found anything to do, we don't
+			 * allocate the surplusvars[] array until we find something.
+			 */
+			if (surplusvars == NULL)
+				surplusvars =
+					(Bitmapset **) palloc0(sizeof(Bitmapset *) *
+										   (list_length(parse->rtable) +
+											1));
+
+			/* Remember the attnos of the removable columns */
+			surplusvars[relid] = bms_difference(relattnos, best_keycolumns);
+		}
+	}
+
+	/*
+	 * If we found any surplus Vars, build a new GROUP BY clause without them.
+	 * (Note: this may leave some TLEs with unreferenced ressortgroupref
+	 * markings, but that's harmless.)
+	 */
+	if (surplusvars != NULL)
+	{
+		List	   *new_groupby = NIL;
+
+		foreach(lc, root->processed_groupClause)
+		{
+			SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
+			TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
+			Var		   *var = (Var *) tle->expr;
+
+			/*
+			 * New list must include non-Vars, outer Vars, and anything not
+			 * marked as surplus.
+			 */
+			if (!IsA(var, Var) || var->varlevelsup > 0 ||
+				!bms_is_member(var->varattno -
+							   FirstLowInvalidHeapAttributeNumber,
+							   surplusvars[var->varno]))
+				new_groupby = lappend(new_groupby, sgc);
+		}
+
+		root->processed_groupClause = new_groupby;
+	}
+}
+
+
 /*****************************************************************************
  *
  *	  LATERAL REFERENCES
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index e17d31a5c3..735560e8ca 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -169,6 +169,9 @@ query_planner(PlannerInfo *root,
 	 */
 	add_base_rels_to_query(root, (Node *) parse->jointree);
 
+	/* Remove any redundant GROUP BY columns */
+	remove_useless_groupby_columns(root);
+
 	/*
 	 * Examine the targetlist and join tree, adding entries to baserel
 	 * targetlists for all referenced Vars, and generating PlaceHolderInfo
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index b665a7762e..352c4edf3a 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -139,7 +139,6 @@ static void preprocess_rowmarks(PlannerInfo *root);
 static double preprocess_limit(PlannerInfo *root,
 							   double tuple_fraction,
 							   int64 *offset_est, int64 *count_est);
-static void remove_useless_groupby_columns(PlannerInfo *root);
 static List *preprocess_groupclause(PlannerInfo *root, List *force);
 static List *extract_rollup_sets(List *groupingSets);
 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
@@ -1487,8 +1486,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
 		{
 			/* Preprocess regular GROUP BY clause, if any */
 			root->processed_groupClause = preprocess_groupclause(root, NIL);
-			/* Remove any redundant GROUP BY columns */
-			remove_useless_groupby_columns(root);
 		}
 
 		/*
@@ -2724,166 +2721,6 @@ limit_needed(Query *parse)
 	return false;				/* don't need a Limit plan node */
 }
 
-
-/*
- * remove_useless_groupby_columns
- *		Remove any columns in the GROUP BY clause that are redundant due to
- *		being functionally dependent on other GROUP BY columns.
- *
- * Since some other DBMSes do not allow references to ungrouped columns, it's
- * not unusual to find all columns listed in GROUP BY even though listing the
- * primary-key columns would be sufficient.  Deleting such excess columns
- * avoids redundant sorting work, so it's worth doing.
- *
- * Relcache invalidations will ensure that cached plans become invalidated
- * when the underlying index of the pkey constraint is dropped.
- *
- * Currently, we only make use of pkey constraints for this, however, we may
- * wish to take this further in the future and also use unique constraints
- * which have NOT NULL columns.  In that case, plan invalidation will still
- * work since relations will receive a relcache invalidation when a NOT NULL
- * constraint is dropped.
- */
-static void
-remove_useless_groupby_columns(PlannerInfo *root)
-{
-	Query	   *parse = root->parse;
-	Bitmapset **groupbyattnos;
-	Bitmapset **surplusvars;
-	ListCell   *lc;
-	int			relid;
-
-	/* No chance to do anything if there are less than two GROUP BY items */
-	if (list_length(root->processed_groupClause) < 2)
-		return;
-
-	/* Don't fiddle with the GROUP BY clause if the query has grouping sets */
-	if (parse->groupingSets)
-		return;
-
-	/*
-	 * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
-	 * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
-	 * that are GROUP BY items.
-	 */
-	groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
-										   (list_length(parse->rtable) + 1));
-	foreach(lc, root->processed_groupClause)
-	{
-		SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
-		TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
-		Var		   *var = (Var *) tle->expr;
-
-		/*
-		 * Ignore non-Vars and Vars from other query levels.
-		 *
-		 * XXX in principle, stable expressions containing Vars could also be
-		 * removed, if all the Vars are functionally dependent on other GROUP
-		 * BY items.  But it's not clear that such cases occur often enough to
-		 * be worth troubling over.
-		 */
-		if (!IsA(var, Var) ||
-			var->varlevelsup > 0)
-			continue;
-
-		/* OK, remember we have this Var */
-		relid = var->varno;
-		Assert(relid <= list_length(parse->rtable));
-		groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
-											  var->varattno - FirstLowInvalidHeapAttributeNumber);
-	}
-
-	/*
-	 * Consider each relation and see if it is possible to remove some of its
-	 * Vars from GROUP BY.  For simplicity and speed, we do the actual removal
-	 * in a separate pass.  Here, we just fill surplusvars[k] with a bitmapset
-	 * of the column attnos of RTE k that are removable GROUP BY items.
-	 */
-	surplusvars = NULL;			/* don't allocate array unless required */
-	relid = 0;
-	foreach(lc, parse->rtable)
-	{
-		RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
-		Bitmapset  *relattnos;
-		Bitmapset  *pkattnos;
-		Oid			constraintOid;
-
-		relid++;
-
-		/* Only plain relations could have primary-key constraints */
-		if (rte->rtekind != RTE_RELATION)
-			continue;
-
-		/*
-		 * We must skip inheritance parent tables as some of the child rels
-		 * may cause duplicate rows.  This cannot happen with partitioned
-		 * tables, however.
-		 */
-		if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
-			continue;
-
-		/* Nothing to do unless this rel has multiple Vars in GROUP BY */
-		relattnos = groupbyattnos[relid];
-		if (bms_membership(relattnos) != BMS_MULTIPLE)
-			continue;
-
-		/*
-		 * Can't remove any columns for this rel if there is no suitable
-		 * (i.e., nondeferrable) primary key constraint.
-		 */
-		pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
-		if (pkattnos == NULL)
-			continue;
-
-		/*
-		 * If the primary key is a proper subset of relattnos then we have
-		 * some items in the GROUP BY that can be removed.
-		 */
-		if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
-		{
-			/*
-			 * To easily remember whether we've found anything to do, we don't
-			 * allocate the surplusvars[] array until we find something.
-			 */
-			if (surplusvars == NULL)
-				surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
-													 (list_length(parse->rtable) + 1));
-
-			/* Remember the attnos of the removable columns */
-			surplusvars[relid] = bms_difference(relattnos, pkattnos);
-		}
-	}
-
-	/*
-	 * If we found any surplus Vars, build a new GROUP BY clause without them.
-	 * (Note: this may leave some TLEs with unreferenced ressortgroupref
-	 * markings, but that's harmless.)
-	 */
-	if (surplusvars != NULL)
-	{
-		List	   *new_groupby = NIL;
-
-		foreach(lc, root->processed_groupClause)
-		{
-			SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
-			TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
-			Var		   *var = (Var *) tle->expr;
-
-			/*
-			 * New list must include non-Vars, outer Vars, and anything not
-			 * marked as surplus.
-			 */
-			if (!IsA(var, Var) ||
-				var->varlevelsup > 0 ||
-				!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
-							   surplusvars[var->varno]))
-				new_groupby = lappend(new_groupby, sgc);
-		}
-
-		root->processed_groupClause = new_groupby;
-	}
-}
-
 /*
  * preprocess_groupclause - do preparatory work on GROUP BY clause
  *
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 37b0ca2e43..153390f2dc 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -457,6 +457,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 			info->indrestrictinfo = NIL;	/* set later, in indxpath.c */
 			info->predOK = false;	/* set later, in indxpath.c */
 			info->unique = index->indisunique;
+			info->nullsnotdistinct = index->indnullsnotdistinct;
 			info->immediate = index->indimmediate;
 			info->hypothetical = false;
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index add0f9e45f..0759e00e96 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1187,6 +1187,8 @@ struct IndexOptInfo
 	bool		predOK;
 	/* true if a unique index */
 	bool		unique;
+	/* true if the index was defined with NULLS NOT DISTINCT */
+	bool		nullsnotdistinct;
 	/* is uniqueness enforced immediately? */
 	bool		immediate;
 	/* true if index doesn't really exist */
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 93137261e4..0b6f0f7969 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -74,6 +74,7 @@ extern void add_vars_to_targetlist(PlannerInfo *root, List *vars,
 								   Relids where_needed);
 extern void add_vars_to_attr_needed(PlannerInfo *root, List *vars,
 									Relids where_needed);
+extern void remove_useless_groupby_columns(PlannerInfo *root);
 extern void find_lateral_references(PlannerInfo *root);
 extern void rebuild_lateral_attr_needed(PlannerInfo *root);
 extern void create_lateral_join_info(PlannerInfo *root);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1e682565d1..23890c6e1f 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1448,6 +1448,85 @@ explain (costs off) select * from p_t1 group by a,b,c,d;
          ->  Seq Scan on p_t1_2
 (5 rows)
 
+create unique index t3_c_uidx on t3(c);
+-- Ensure we don't remove any columns from the GROUP BY for a unique
+-- index on a NULLable column.
+explain (costs off) select b,c from t3 group by b,c;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: b, c
+   ->  Seq Scan on t3
+(3 rows)
+
+-- Make the column NOT NULL and ensure we remove the redundant column
+alter table t3 alter column c set not null;
+explain (costs off) select b,c from t3 group by b,c;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: c
+   ->  Seq Scan on t3
+(3 rows)
+
+-- When there are multiple supporting unique indexes and the GROUP BY contains
+-- columns to cover all of those, ensure we pick the index with the least
+-- number of columns so that we can remove more columns from the GROUP BY.
+explain (costs off) select a,b,c from t3 group by a,b,c;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: c
+   ->  Seq Scan on t3
+(3 rows)
+
+-- As above but try ordering the columns differently to ensure we get the
+-- same result.
+explain (costs off) select a,b,c from t3 group by c,a,b;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: c
+   ->  Seq Scan on t3
+(3 rows)
+
+-- A unique index defined as NULLS NOT DISTINCT does not need a supporting NOT
+-- NULL constraint on the indexed columns.  Ensure the redundant columns are
+-- removed from the GROUP BY for such a table.
+drop index t3_c_uidx;
+alter table t3 alter column c drop not null;
+create unique index t3_c_uidx on t3(c) nulls not distinct;
+explain (costs off) select b,c from t3 group by b,c;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: c
+   ->  Seq Scan on t3
+(3 rows)
+
+--test unique expression index can be used to remove redundant columns.
+drop index t3_c_uidx;
+create unique index t3_bc on t3((c+b));
+alter table t3 alter column c set not null;
+explain(costs off) select a,b,c from t3 group by a,b,c;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: b, c
+   ->  Seq Scan on t3
+(3 rows)
+
+--test NULLS NOT DISTINCT with unique expression index
+create unique index t3_bc_uidx on t3((c+b)) nulls not distinct;
+alter table t3 alter column c drop not null;
+explain(costs off) select a,b,c from t3 group by a,b,c;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: b, c
+   ->  Seq Scan on t3
+(3 rows)
+
 drop table t1 cascade;
 NOTICE:  drop cascades to table t1c
 drop table t2;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 4885daffe6..b4d1b9b97a 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -507,6 +507,45 @@ create temp table p_t1_2 partition of p_t1 for values in(2);
 -- Ensure we can remove non-PK columns for partitioned tables.
 explain (costs off) select * from p_t1 group by a,b,c,d;
 
+create unique index t3_c_uidx on t3(c);
+
+-- Ensure we don't remove any columns from the GROUP BY for a unique
+-- index on a NULLable column.
+explain (costs off) select b,c from t3 group by b,c;
+
+-- Make the column NOT NULL and ensure we remove the redundant column
+alter table t3 alter column c set not null;
+explain (costs off) select b,c from t3 group by b,c;
+
+-- When there are multiple supporting unique indexes and the GROUP BY contains
+-- columns to cover all of those, ensure we pick the index with the least
+-- number of columns so that we can remove more columns from the GROUP BY.
+explain (costs off) select a,b,c from t3 group by a,b,c;
+
+-- As above but try ordering the columns differently to ensure we get the
+-- same result.
+explain (costs off) select a,b,c from t3 group by c,a,b;
+
+-- A unique index defined as NULLS NOT DISTINCT does not need a supporting NOT
+-- NULL constraint on the indexed columns.  Ensure the redundant columns are
+-- removed from the GROUP BY for such a table.
+drop index t3_c_uidx;
+alter table t3 alter column c drop not null;
+create unique index t3_c_uidx on t3(c) nulls not distinct;
+explain (costs off) select b,c from t3 group by b,c;
+
+
+--test unique expression index can be used to remove redundant columns.
+drop index t3_c_uidx;
+create unique index t3_bc on t3((c+b));
+alter table t3 alter column c set not null;
+explain(costs off) select a,b,c from t3 group by a,b,c;
+
+--test NULLS NOT DISTINCT with unique expression index
+create unique index t3_bc_uidx on t3((c+b)) nulls not distinct;
+alter table t3 alter column c drop not null;
+explain(costs off) select a,b,c from t3 group by a,b,c;
+
 drop table t1 cascade;
 drop table t2;
 drop table t3;
-- 
2.34.1

Re: Remove useless GROUP BY columns considering unique index

Reply via email to