Re: Expand applicability of aggregate's sortop optimization

Andrei Lepikhov Fri, 27 Dec 2024 20:24:39 -0800

On 12/11/24 07:22, Michael Paquier wrote:

On Fri, Nov 08, 2024 at 01:05:04PM +0700, Andrei Lepikhov wrote:

Rebase onto current master and slight improvement.


Your patch is failing in the CI, please rebase.  I have it to the next
CF for now, waiting on author.

Thanks,

I overlooked the fact that regression tests can be launched in parallel.Therefore, adding an index to the frequently used tenk1 table was a badidea.

In the attachment - the new patch rebased on the current master.

--
regards, Andrei Lepikhov

From 880ddf7170a0e01fb852172ba99a32e2494f50c6 Mon Sep 17 00:00:00 2001
From: "Andrei V. Lepikhov" <[email protected]>
Date: Fri, 8 Nov 2024 10:36:06 +0700
Subject: [PATCH v4] Add prosupport helpers to aggregate functions.

Following the spirit of ed1a88d, add a prosupport call for aggregates.
Here, we introduce a new support function node type to allow support functions
to be called for aggregate functions.
This code introduces only min/max trivial optimisation in the core. However, a
user can design alternative support functions on their own.
---
 src/backend/optimizer/plan/planagg.c     |  76 +++++++++++++
 src/backend/optimizer/prep/prepagg.c     |  26 +++++
 src/include/catalog/pg_proc.dat          |  92 +++++++--------
 src/include/nodes/supportnodes.h         |   7 ++
 src/test/regress/expected/aggregates.out | 135 ++++++++++++++++++++++-
 src/test/regress/sql/aggregates.sql      |  62 +++++++++++
 src/tools/pgindent/typedefs.list         |   1 +
 7 files changed, 352 insertions(+), 47 deletions(-)

diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index dbcd3b9a0a..845b17f01b 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -33,6 +33,7 @@
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
 #include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
@@ -43,6 +44,7 @@
 #include "parser/parse_clause.h"
 #include "parser/parsetree.h"
 #include "rewrite/rewriteManip.h"
+#include "utils/fmgrprotos.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 
@@ -512,3 +514,77 @@ fetch_agg_sort_op(Oid aggfnoid)
 
 	return aggsortop;
 }
+
+Datum
+minmax_support(PG_FUNCTION_ARGS)
+{
+	Node   *rawreq = (Node *) PG_GETARG_POINTER(0);
+	Oid		aggsortop;
+
+	if (IsA(rawreq, SupportRequestAggregate))
+	{
+		SupportRequestAggregate   *req = (SupportRequestAggregate *) rawreq;
+		Aggref				   *aggref = req->aggref;
+
+		if (list_length(aggref->args) != 1 || aggref->aggfilter != NULL)
+			PG_RETURN_POINTER(NULL);
+
+		aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
+		if (!OidIsValid(aggsortop))
+			PG_RETURN_POINTER(NULL);		/* not a MIN/MAX aggregate */
+
+		if (aggref->aggorder != NIL)
+		{
+			SortGroupClause	   *orderClause;
+			TargetEntry		   *curTarget;
+
+			curTarget = (TargetEntry *) linitial(aggref->args);
+
+			/*
+			 * If the order clause is the same column as the one we're
+			 * aggregating, we can still use the index: It is undefined which
+			 * value is MIN() or MAX(), as well as which value is first or
+			 * last when sorted. So, we can still use the index IFF the
+			 * aggregated expression equals the expression used in the
+			 * ordering operation.
+			 */
+
+			/*
+			 * We only accept a single argument to min/max aggregates,
+			 * orderings that have more clauses won't provide correct results.
+			 */
+			Assert(list_length(aggref->aggorder) == 1);
+
+			orderClause = castNode(SortGroupClause, linitial(aggref->aggorder));
+
+			if (orderClause->tleSortGroupRef != curTarget->ressortgroupref)
+				elog(ERROR, "Aggregate order clause isn't found in target list");
+
+			if (orderClause->sortop != aggsortop)
+			{
+				List	   *btclasses;
+				ListCell   *lc;
+
+				btclasses = get_op_btree_interpretation(orderClause->sortop);
+
+				foreach(lc, btclasses)
+				{
+					OpBtreeInterpretation *interpretation;
+
+					interpretation = (OpBtreeInterpretation *) lfirst(lc);
+					if (op_in_opfamily(aggsortop, interpretation->opfamily_id))
+					{
+						aggref->aggorder = NIL;
+						break;
+					}
+				}
+
+				list_free_deep(btclasses);
+			}
+			else
+				aggref->aggorder = NIL;
+		}
+	}
+
+	PG_RETURN_POINTER(NULL);
+}
diff --git a/src/backend/optimizer/prep/prepagg.c b/src/backend/optimizer/prep/prepagg.c
index e935c9d094..74347b3d7e 100644
--- a/src/backend/optimizer/prep/prepagg.c
+++ b/src/backend/optimizer/prep/prepagg.c
@@ -39,6 +39,7 @@
 #include "catalog/pg_type.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/pathnodes.h"
+#include "nodes/supportnodes.h"
 #include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/plancat.h"
@@ -64,6 +65,25 @@ static int	find_compatible_trans(PlannerInfo *root, Aggref *newagg,
 								  List *transnos);
 static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
 
+static void
+optimize_aggregates(Aggref *aggref)
+{
+	SupportRequestAggregate	req;
+	Oid						prosupport;
+
+	prosupport = get_func_support(aggref->aggfnoid);
+
+	/* Check if there's a support function for the aggregate */
+	if (!OidIsValid(prosupport))
+		return;
+
+
+	req.type = T_SupportRequestAggregate;
+	req.aggref = aggref;
+	/* call the support function */
+	(void) OidFunctionCall1(prosupport, PointerGetDatum(&req));
+}
+
 /* -----------------
  * Resolve the transition type of all Aggrefs, and determine which Aggrefs
  * can share aggregate or transition state.
@@ -215,6 +235,12 @@ preprocess_aggref(Aggref *aggref, PlannerInfo *root)
 
 	ReleaseSysCache(aggTuple);
 
+	/*
+	 * See if any modifications can be made to each aggregate to allow
+	 * planner to process it in more effective way.
+	 */
+	optimize_aggregates(aggref);
+
 	/*
 	 * 1. See if this is identical to another aggregate function call that
 	 * we've seen already.
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 2dcc2d42da..57a939ec60 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -6834,155 +6834,159 @@
   proname => 'sum', prokind => 'a', proisstrict => 'f', prorettype => 'numeric',
   proargtypes => 'numeric', prosrc => 'aggregate_dummy' },
 
-{ oid => '2115', descr => 'maximum value of all bigint input values',
+{ oid => '2775', descr => 'planner support for min and max aggregates',
+  proname => 'minmax_support', prorettype => 'internal',
+  proargtypes => 'internal', prosrc => 'minmax_support' },
+{ oid => '2115', prosupport => 'minmax_support',
+  descr => 'maximum value of all bigint input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'int8',
   proargtypes => 'int8', prosrc => 'aggregate_dummy' },
-{ oid => '2116', descr => 'maximum value of all integer input values',
+{ oid => '2116', prosupport => 'minmax_support', descr => 'maximum value of all integer input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'int4',
   proargtypes => 'int4', prosrc => 'aggregate_dummy' },
-{ oid => '2117', descr => 'maximum value of all smallint input values',
+{ oid => '2117', prosupport => 'minmax_support', descr => 'maximum value of all smallint input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'int2',
   proargtypes => 'int2', prosrc => 'aggregate_dummy' },
-{ oid => '2118', descr => 'maximum value of all oid input values',
+{ oid => '2118', prosupport => 'minmax_support', descr => 'maximum value of all oid input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'oid',
   proargtypes => 'oid', prosrc => 'aggregate_dummy' },
-{ oid => '2119', descr => 'maximum value of all float4 input values',
+{ oid => '2119', prosupport => 'minmax_support', descr => 'maximum value of all float4 input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'float4',
   proargtypes => 'float4', prosrc => 'aggregate_dummy' },
-{ oid => '2120', descr => 'maximum value of all float8 input values',
+{ oid => '2120', prosupport => 'minmax_support', descr => 'maximum value of all float8 input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'float8',
   proargtypes => 'float8', prosrc => 'aggregate_dummy' },
-{ oid => '2122', descr => 'maximum value of all date input values',
+{ oid => '2122', prosupport => 'minmax_support', descr => 'maximum value of all date input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'date',
   proargtypes => 'date', prosrc => 'aggregate_dummy' },
-{ oid => '2123', descr => 'maximum value of all time input values',
+{ oid => '2123', prosupport => 'minmax_support', descr => 'maximum value of all time input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'time',
   proargtypes => 'time', prosrc => 'aggregate_dummy' },
-{ oid => '2124',
+{ oid => '2124', prosupport => 'minmax_support',
   descr => 'maximum value of all time with time zone input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'timetz',
   proargtypes => 'timetz', prosrc => 'aggregate_dummy' },
-{ oid => '2125', descr => 'maximum value of all money input values',
+{ oid => '2125', prosupport => 'minmax_support', descr => 'maximum value of all money input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'money',
   proargtypes => 'money', prosrc => 'aggregate_dummy' },
-{ oid => '2126', descr => 'maximum value of all timestamp input values',
+{ oid => '2126', prosupport => 'minmax_support', descr => 'maximum value of all timestamp input values',
   proname => 'max', prokind => 'a', proisstrict => 'f',
   prorettype => 'timestamp', proargtypes => 'timestamp',
   prosrc => 'aggregate_dummy' },
-{ oid => '2127',
+{ oid => '2127', prosupport => 'minmax_support',
   descr => 'maximum value of all timestamp with time zone input values',
   proname => 'max', prokind => 'a', proisstrict => 'f',
   prorettype => 'timestamptz', proargtypes => 'timestamptz',
   prosrc => 'aggregate_dummy' },
-{ oid => '2128', descr => 'maximum value of all interval input values',
+{ oid => '2128', prosupport => 'minmax_support', descr => 'maximum value of all interval input values',
   proname => 'max', prokind => 'a', proisstrict => 'f',
   prorettype => 'interval', proargtypes => 'interval',
   prosrc => 'aggregate_dummy' },
-{ oid => '2129', descr => 'maximum value of all text input values',
+{ oid => '2129', prosupport => 'minmax_support', descr => 'maximum value of all text input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'text',
   proargtypes => 'text', prosrc => 'aggregate_dummy' },
 { oid => '2130', descr => 'maximum value of all numeric input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'numeric',
   proargtypes => 'numeric', prosrc => 'aggregate_dummy' },
-{ oid => '2050', descr => 'maximum value of all anyarray input values',
+{ oid => '2050', prosupport => 'minmax_support', descr => 'maximum value of all anyarray input values',
   proname => 'max', prokind => 'a', proisstrict => 'f',
   prorettype => 'anyarray', proargtypes => 'anyarray',
   prosrc => 'aggregate_dummy' },
-{ oid => '8595', descr => 'maximum value of all record input values',
+{ oid => '8595', prosupport => 'minmax_support', descr => 'maximum value of all record input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'record',
   proargtypes => 'record', prosrc => 'aggregate_dummy' },
-{ oid => '2244', descr => 'maximum value of all bpchar input values',
+{ oid => '2244', prosupport => 'minmax_support', descr => 'maximum value of all bpchar input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'bpchar',
   proargtypes => 'bpchar', prosrc => 'aggregate_dummy' },
-{ oid => '2797', descr => 'maximum value of all tid input values',
+{ oid => '2797', prosupport => 'minmax_support', descr => 'maximum value of all tid input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'tid',
   proargtypes => 'tid', prosrc => 'aggregate_dummy' },
-{ oid => '3564', descr => 'maximum value of all inet input values',
+{ oid => '3564', prosupport => 'minmax_support', descr => 'maximum value of all inet input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'inet',
   proargtypes => 'inet', prosrc => 'aggregate_dummy' },
-{ oid => '4189', descr => 'maximum value of all pg_lsn input values',
+{ oid => '4189', prosupport => 'minmax_support', descr => 'maximum value of all pg_lsn input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'pg_lsn',
   proargtypes => 'pg_lsn', prosrc => 'aggregate_dummy' },
-{ oid => '5099', descr => 'maximum value of all xid8 input values',
+{ oid => '5099', prosupport => 'minmax_support', descr => 'maximum value of all xid8 input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'xid8',
   proargtypes => 'xid8', prosrc => 'aggregate_dummy' },
-{ oid => '8922', descr => 'maximum value of all bytea input values',
+{ oid => '8922', prosupport => 'minmax_support', descr => 'maximum value of all bytea input values',
   proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'bytea',
   proargtypes => 'bytea', prosrc => 'aggregate_dummy' },
 
-{ oid => '2131', descr => 'minimum value of all bigint input values',
+{ oid => '2131', prosupport => 'minmax_support', descr => 'minimum value of all bigint input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'int8',
   proargtypes => 'int8', prosrc => 'aggregate_dummy' },
-{ oid => '2132', descr => 'minimum value of all integer input values',
+{ oid => '2132', prosupport => 'minmax_support', descr => 'minimum value of all integer input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'int4',
   proargtypes => 'int4', prosrc => 'aggregate_dummy' },
-{ oid => '2133', descr => 'minimum value of all smallint input values',
+{ oid => '2133', prosupport => 'minmax_support', descr => 'minimum value of all smallint input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'int2',
   proargtypes => 'int2', prosrc => 'aggregate_dummy' },
-{ oid => '2134', descr => 'minimum value of all oid input values',
+{ oid => '2134', prosupport => 'minmax_support', descr => 'minimum value of all oid input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'oid',
   proargtypes => 'oid', prosrc => 'aggregate_dummy' },
-{ oid => '2135', descr => 'minimum value of all float4 input values',
+{ oid => '2135', prosupport => 'minmax_support', descr => 'minimum value of all float4 input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'float4',
   proargtypes => 'float4', prosrc => 'aggregate_dummy' },
-{ oid => '2136', descr => 'minimum value of all float8 input values',
+{ oid => '2136', prosupport => 'minmax_support', descr => 'minimum value of all float8 input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'float8',
   proargtypes => 'float8', prosrc => 'aggregate_dummy' },
-{ oid => '2138', descr => 'minimum value of all date input values',
+{ oid => '2138', prosupport => 'minmax_support', descr => 'minimum value of all date input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'date',
   proargtypes => 'date', prosrc => 'aggregate_dummy' },
-{ oid => '2139', descr => 'minimum value of all time input values',
+{ oid => '2139', prosupport => 'minmax_support', descr => 'minimum value of all time input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'time',
   proargtypes => 'time', prosrc => 'aggregate_dummy' },
-{ oid => '2140',
+{ oid => '2140', prosupport => 'minmax_support',
   descr => 'minimum value of all time with time zone input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'timetz',
   proargtypes => 'timetz', prosrc => 'aggregate_dummy' },
-{ oid => '2141', descr => 'minimum value of all money input values',
+{ oid => '2141', prosupport => 'minmax_support', descr => 'minimum value of all money input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'money',
   proargtypes => 'money', prosrc => 'aggregate_dummy' },
-{ oid => '2142', descr => 'minimum value of all timestamp input values',
+{ oid => '2142', prosupport => 'minmax_support', descr => 'minimum value of all timestamp input values',
   proname => 'min', prokind => 'a', proisstrict => 'f',
   prorettype => 'timestamp', proargtypes => 'timestamp',
   prosrc => 'aggregate_dummy' },
-{ oid => '2143',
+{ oid => '2143', prosupport => 'minmax_support',
   descr => 'minimum value of all timestamp with time zone input values',
   proname => 'min', prokind => 'a', proisstrict => 'f',
   prorettype => 'timestamptz', proargtypes => 'timestamptz',
   prosrc => 'aggregate_dummy' },
-{ oid => '2144', descr => 'minimum value of all interval input values',
+{ oid => '2144', prosupport => 'minmax_support', descr => 'minimum value of all interval input values',
   proname => 'min', prokind => 'a', proisstrict => 'f',
   prorettype => 'interval', proargtypes => 'interval',
   prosrc => 'aggregate_dummy' },
-{ oid => '2145', descr => 'minimum value of all text values',
+{ oid => '2145', prosupport => 'minmax_support', descr => 'minimum value of all text values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'text',
   proargtypes => 'text', prosrc => 'aggregate_dummy' },
 { oid => '2146', descr => 'minimum value of all numeric input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'numeric',
   proargtypes => 'numeric', prosrc => 'aggregate_dummy' },
-{ oid => '2051', descr => 'minimum value of all anyarray input values',
+{ oid => '2051', prosupport => 'minmax_support', descr => 'minimum value of all anyarray input values',
   proname => 'min', prokind => 'a', proisstrict => 'f',
   prorettype => 'anyarray', proargtypes => 'anyarray',
   prosrc => 'aggregate_dummy' },
-{ oid => '8596', descr => 'minimum value of all record input values',
+{ oid => '8596', prosupport => 'minmax_support', descr => 'minimum value of all record input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'record',
   proargtypes => 'record', prosrc => 'aggregate_dummy' },
-{ oid => '2245', descr => 'minimum value of all bpchar input values',
+{ oid => '2245', prosupport => 'minmax_support', descr => 'minimum value of all bpchar input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'bpchar',
   proargtypes => 'bpchar', prosrc => 'aggregate_dummy' },
-{ oid => '2798', descr => 'minimum value of all tid input values',
+{ oid => '2798', prosupport => 'minmax_support', descr => 'minimum value of all tid input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'tid',
   proargtypes => 'tid', prosrc => 'aggregate_dummy' },
-{ oid => '3565', descr => 'minimum value of all inet input values',
+{ oid => '3565', prosupport => 'minmax_support', descr => 'minimum value of all inet input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'inet',
   proargtypes => 'inet', prosrc => 'aggregate_dummy' },
-{ oid => '4190', descr => 'minimum value of all pg_lsn input values',
+{ oid => '4190', prosupport => 'minmax_support', descr => 'minimum value of all pg_lsn input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'pg_lsn',
   proargtypes => 'pg_lsn', prosrc => 'aggregate_dummy' },
-{ oid => '5100', descr => 'minimum value of all xid8 input values',
+{ oid => '5100', prosupport => 'minmax_support', descr => 'minimum value of all xid8 input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'xid8',
   proargtypes => 'xid8', prosrc => 'aggregate_dummy' },
-{ oid => '8923', descr => 'minimum value of all bytea input values',
+{ oid => '8923', prosupport => 'minmax_support', descr => 'minimum value of all bytea input values',
   proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'bytea',
   proargtypes => 'bytea', prosrc => 'aggregate_dummy' },
 
diff --git a/src/include/nodes/supportnodes.h b/src/include/nodes/supportnodes.h
index 5f7bcde891..5b9a14c855 100644
--- a/src/include/nodes/supportnodes.h
+++ b/src/include/nodes/supportnodes.h
@@ -343,4 +343,11 @@ typedef struct SupportRequestOptimizeWindowClause
 								 * optimizations are possible. */
 } SupportRequestOptimizeWindowClause;
 
+typedef struct SupportRequestAggregate
+{
+	NodeTag		type;
+
+	Aggref	   *aggref;
+} SupportRequestAggregate;
+
 #endif							/* SUPPORTNODES_H */
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f2fb66388c..cd5cac4f52 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1003,6 +1003,135 @@ select max(unique1) from tenk1 where unique1 > 42;
  9999
 (1 row)
 
+-- When sorting on the column that's being aggregated, indexes can also be
+-- used, but only when the aggregate's operator has the same ordering behavior
+-- as the ORDER BY-clause, i.e. if it is in the same btree opclass as the one
+-- chosen for the ORDER BY clause.
+explain (costs off)
+  select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1;
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Result
+   InitPlan 1
+     ->  Limit
+           ->  Index Only Scan using tenk1_unique1 on tenk1
+                 Index Cond: (unique1 IS NOT NULL)
+(5 rows)
+
+select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1;
+ min 
+-----
+   0
+(1 row)
+
+explain (costs off)
+  select max(unique1 ORDER BY unique1 USING <) from tenk1;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Result
+   InitPlan 1
+     ->  Limit
+           ->  Index Only Scan Backward using tenk1_unique1 on tenk1
+                 Index Cond: (unique1 IS NOT NULL)
+(5 rows)
+
+select max(unique1 ORDER BY unique1 USING <) from tenk1;
+ max  
+------
+ 9999
+(1 row)
+
+explain (costs off)
+  select max(unique1 ORDER BY tenthous) from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: tenthous
+         ->  Seq Scan on tenk1
+(4 rows)
+
+select max(unique1 ORDER BY tenthous) from tenk1;
+ max  
+------
+ 9999
+(1 row)
+
+CREATE OPERATOR @>@ (function=int4gt, leftarg=int4, rightarg=int4);
+CREATE OPERATOR @<@ (function=int4lt, leftarg=int4, rightarg=int4);
+CREATE OPERATOR @=@ (function=int4eq, leftarg=int4, rightarg=int4);
+CREATE OPERATOR FAMILY my_family USING btree;
+CREATE OPERATOR CLASS my_int_ops
+  FOR TYPE int
+  USING btree FAMILY my_family AS
+  OPERATOR 1 @<@ FOR SEARCH,
+  OPERATOR 5 @>@ FOR SEARCH,
+  OPERATOR 3 @=@,
+  FUNCTION 1 btint4cmp;
+CREATE AGGREGATE my_int_max (
+  BASETYPE = int4,
+  SFUNC = int4larger,
+  STYPE = int4,
+  SORTOP = @>@
+);
+-- Dirty hack in absence of prosupport declaration in the 'CREATE AGGREGATE'
+-- statement.
+UPDATE pg_proc SET prosupport = 'minmax_support'::regproc
+  WHERE proname = 'my_int_max';
+CREATE TABLE grouping_prosupport_test (x integer);
+SET enable_seqscan = 'off'; -- Avoid time consuming data generation
+CREATE INDEX idx_int4 ON grouping_prosupport_test (x my_int_ops);
+VACUUM ANALYZE grouping_prosupport_test;
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT my_int_max(x::int4 ORDER BY x::int4 USING @<@ )
+FROM grouping_prosupport_test;
+                                        QUERY PLAN                                        
+------------------------------------------------------------------------------------------
+ Result
+   Output: (InitPlan 1).col1
+   InitPlan 1
+     ->  Limit
+           Output: grouping_prosupport_test.x
+           ->  Index Only Scan Backward using idx_int4 on public.grouping_prosupport_test
+                 Output: grouping_prosupport_test.x
+                 Index Cond: (grouping_prosupport_test.x IS NOT NULL)
+(8 rows)
+
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT my_int_max(x::int4 ORDER BY x::int4 USING @>@ )
+FROM grouping_prosupport_test;
+                                        QUERY PLAN                                        
+------------------------------------------------------------------------------------------
+ Result
+   Output: (InitPlan 1).col1
+   InitPlan 1
+     ->  Limit
+           Output: grouping_prosupport_test.x
+           ->  Index Only Scan Backward using idx_int4 on public.grouping_prosupport_test
+                 Output: grouping_prosupport_test.x
+                 Index Cond: (grouping_prosupport_test.x IS NOT NULL)
+(8 rows)
+
+RESET enable_seqscan;
+DROP AGGREGATE my_int_max(int4);
+DROP OPERATOR CLASS my_int_ops USING btree CASCADE;
+NOTICE:  drop cascades to index idx_int4
+DROP OPERATOR FAMILY my_family USING btree;
+DROP OPERATOR @>@ (int4, int4);
+DROP OPERATOR @<@ (int4, int4);
+DROP OPERATOR @=@ (int4, int4);
+DROP TABLE grouping_prosupport_test;
+--  But even then, the index can't be used if we order by multiple columns.
+explain (costs off)
+  select max(unique1 ORDER BY unique1, tenthous) from tenk1;
+             QUERY PLAN              
+-------------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: unique1, tenthous
+         ->  Seq Scan on tenk1
+(4 rows)
+
 -- the planner may choose a generic aggregate here if parallel query is
 -- enabled, since that plan will be parallel safe and the "optimized"
 -- plan, which has almost identical cost, will not be.  we want to test
@@ -1573,7 +1702,7 @@ from tenk1;
 -------------------------------
  Aggregate
    ->  Sort
-         Sort Key: four
+         Sort Key: two
          ->  Seq Scan on tenk1
 (4 rows)
 
@@ -1602,7 +1731,7 @@ from tenk1;
 -------------------------------
  Aggregate
    ->  Sort
-         Sort Key: four
+         Sort Key: two
          ->  Seq Scan on tenk1
 (4 rows)
 
@@ -1618,7 +1747,7 @@ from tenk1;
 -------------------------------
  Aggregate
    ->  Sort
-         Sort Key: ten
+         Sort Key: two
          ->  Seq Scan on tenk1
 (4 rows)
 
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 77168bcc74..8dd8503bb8 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -368,6 +368,68 @@ explain (costs off)
   select max(unique1) from tenk1 where unique1 > 42;
 select max(unique1) from tenk1 where unique1 > 42;
 
+-- When sorting on the column that's being aggregated, indexes can also be
+-- used, but only when the aggregate's operator has the same ordering behavior
+-- as the ORDER BY-clause, i.e. if it is in the same btree opclass as the one
+-- chosen for the ORDER BY clause.
+explain (costs off)
+  select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1;
+select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1;
+explain (costs off)
+  select max(unique1 ORDER BY unique1 USING <) from tenk1;
+select max(unique1 ORDER BY unique1 USING <) from tenk1;
+explain (costs off)
+  select max(unique1 ORDER BY tenthous) from tenk1;
+select max(unique1 ORDER BY tenthous) from tenk1;
+
+CREATE OPERATOR @>@ (function=int4gt, leftarg=int4, rightarg=int4);
+CREATE OPERATOR @<@ (function=int4lt, leftarg=int4, rightarg=int4);
+CREATE OPERATOR @=@ (function=int4eq, leftarg=int4, rightarg=int4);
+
+CREATE OPERATOR FAMILY my_family USING btree;
+CREATE OPERATOR CLASS my_int_ops
+  FOR TYPE int
+  USING btree FAMILY my_family AS
+  OPERATOR 1 @<@ FOR SEARCH,
+  OPERATOR 5 @>@ FOR SEARCH,
+  OPERATOR 3 @=@,
+  FUNCTION 1 btint4cmp;
+
+CREATE AGGREGATE my_int_max (
+  BASETYPE = int4,
+  SFUNC = int4larger,
+  STYPE = int4,
+  SORTOP = @>@
+);
+
+-- Dirty hack in absence of prosupport declaration in the 'CREATE AGGREGATE'
+-- statement.
+UPDATE pg_proc SET prosupport = 'minmax_support'::regproc
+  WHERE proname = 'my_int_max';
+CREATE TABLE grouping_prosupport_test (x integer);
+SET enable_seqscan = 'off'; -- Avoid time consuming data generation
+CREATE INDEX idx_int4 ON grouping_prosupport_test (x my_int_ops);
+VACUUM ANALYZE grouping_prosupport_test;
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT my_int_max(x::int4 ORDER BY x::int4 USING @<@ )
+FROM grouping_prosupport_test;
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT my_int_max(x::int4 ORDER BY x::int4 USING @>@ )
+FROM grouping_prosupport_test;
+
+RESET enable_seqscan;
+DROP AGGREGATE my_int_max(int4);
+DROP OPERATOR CLASS my_int_ops USING btree CASCADE;
+DROP OPERATOR FAMILY my_family USING btree;
+DROP OPERATOR @>@ (int4, int4);
+DROP OPERATOR @<@ (int4, int4);
+DROP OPERATOR @=@ (int4, int4);
+DROP TABLE grouping_prosupport_test;
+
+--  But even then, the index can't be used if we order by multiple columns.
+explain (costs off)
+  select max(unique1 ORDER BY unique1, tenthous) from tenk1;
+
 -- the planner may choose a generic aggregate here if parallel query is
 -- enabled, since that plan will be parallel safe and the "optimized"
 -- plan, which has almost identical cost, will not be.  we want to test
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index e1c4f913f8..53b5136538 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2793,6 +2793,7 @@ SubscriptionRelState
 SummarizerReadLocalXLogPrivate
 SupportRequestCost
 SupportRequestIndexCondition
+SupportRequestAggregate
 SupportRequestOptimizeWindowClause
 SupportRequestRows
 SupportRequestSelectivity
-- 
2.39.5

Re: Expand applicability of aggregate's sortop optimization

Reply via email to