On 17/7/2024 16:33, Matthias van de Meent wrote:
On Wed, 17 Jul 2024 at 05:29, Andrei Lepikhov <lepi...@gmail.com> wrote:
As I see, the code:
aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
if (!OidIsValid(aggsortop))
return false; /* not a MIN/MAX aggregate */
used twice and can be evaluated earlier to avoid duplicated code.
The code is structured like this to make sure we only start accessing
catalogs once we know that all other reasons to bail out from this
optimization indicate we can apply the opimization. You'll notice that
I've tried to put the cheapest checks that only use caller-supplied
information first, and catalog accesses only after that.
After additional research I think I get the key misunderstanding why you
did so:
As I see, the checks:
if (list_length(aggref->aggorder) > 1)
return false;
if (orderClause->tleSortGroupRef != curTarget->ressortgroupref)
return false;
not needed at all. You already have check:
if (list_length(aggref->args) != 1)
and this tells us, that if we have ordering like MIN(x ORDER BY <smth>),
this <smth> ordering contains only aggregate argument x. Because if it
contained some expression, the transformAggregateCall() would add this
expression to agg->args by calling the transformSortClause() routine.
The tleSortGroupRef is just exactly ressortgroupref - no need to recheck
it one more time. Of course, it is suitable only for MIN/MAX aggregates,
but we discuss only them right now. Am I wrong?
If you want, you can place it as assertions (see the diff in attachment).
--
regards, Andrei Lepikhov
diff --git a/src/backend/optimizer/plan/planagg.c
b/src/backend/optimizer/plan/planagg.c
index afb5445b77..e0cbe5c923 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -253,6 +253,20 @@ can_minmax_aggs(PlannerInfo *root, List **context)
if (list_length(aggref->args) != 1)
return false; /* it couldn't be MIN/MAX */
+ /*
+ * We might implement the optimization when a FILTER clause is
present
+ * by adding the filter to the quals of the generated subquery.
For
+ * now, just punt.
+ */
+ if (aggref->aggfilter != NULL)
+ return false;
+
+ curTarget = (TargetEntry *) linitial(aggref->args);
+
+ aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
+ if (!OidIsValid(aggsortop))
+ return false; /* not a MIN/MAX aggregate */
+
/*
* ORDER BY is usually irrelevant for MIN/MAX, but it can
change the
* outcome if the aggsortop's operator class recognizes
non-identical
@@ -267,22 +281,35 @@ can_minmax_aggs(PlannerInfo *root, List **context)
* quickly.
*/
if (aggref->aggorder != NIL)
- return false;
- /* note: we do not care if DISTINCT is mentioned ... */
+ {
+ SortGroupClause *orderClause;
- /*
- * We might implement the optimization when a FILTER clause is
present
- * by adding the filter to the quals of the generated subquery.
For
- * now, just punt.
- */
- if (aggref->aggfilter != NULL)
- return false;
+ /*
+ * If the order clause is the same column as the one
we're
+ * aggregating, we can still use the index: It is
undefined which
+ * value is MIN() or MAX(), as well as which value is
first or
+ * last when sorted. So, we can still use the index IFF
the
+ * aggregated expression equals the expression used in
the
+ * ordering operation.
+ */
- aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
- if (!OidIsValid(aggsortop))
- return false; /* not a MIN/MAX aggregate */
+ /*
+ * We only accept a single argument to min/max
aggregates,
+ * orderings that have more clauses won't provide
correct results.
+ */
+ Assert(list_length(aggref->aggorder) == 1);
+
+ orderClause = castNode(SortGroupClause,
linitial(aggref->aggorder));
+
+ Assert(orderClause->tleSortGroupRef ==
curTarget->ressortgroupref);
+
+ if (orderClause->sortop != aggsortop &&
+ orderClause->sortop !=
get_commutator(aggsortop))
+ return false;
+ }
+
+ /* note: we do not care if DISTINCT is mentioned ... */
- curTarget = (TargetEntry *) linitial(aggref->args);
if (contain_mutable_functions((Node *) curTarget->expr))
return false; /* not potentially indexable */
diff --git a/src/test/regress/expected/aggregates.out
b/src/test/regress/expected/aggregates.out
index a5596ab210..5d37510d64 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1003,6 +1003,71 @@ select max(unique1) from tenk1 where unique1 > 42;
9999
(1 row)
+-- When sorting on the column that's being aggregated, indexes can also be
+-- used, but only when the aggregate's operator has the same ordering behavior
+-- as the ORDER BY-clause, i.e. if it is in the same btree opclass as the one
+-- chosen for the ORDER BY clause.
+explain (costs off)
+ select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1;
+ QUERY PLAN
+------------------------------------------------------------
+ Result
+ InitPlan 1
+ -> Limit
+ -> Index Only Scan using tenk1_unique1 on tenk1
+ Index Cond: (unique1 IS NOT NULL)
+(5 rows)
+
+select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1;
+ min
+-----
+ 0
+(1 row)
+
+explain (costs off)
+ select max(unique1 ORDER BY unique1 USING <) from tenk1;
+ QUERY PLAN
+---------------------------------------------------------------------
+ Result
+ InitPlan 1
+ -> Limit
+ -> Index Only Scan Backward using tenk1_unique1 on tenk1
+ Index Cond: (unique1 IS NOT NULL)
+(5 rows)
+
+select max(unique1 ORDER BY unique1 USING <) from tenk1;
+ max
+------
+ 9999
+(1 row)
+
+explain (costs off)
+ select max(unique1 ORDER BY tenthous) from tenk1;
+ QUERY PLAN
+-------------------------------
+ Aggregate
+ -> Sort
+ Sort Key: tenthous
+ -> Seq Scan on tenk1
+(4 rows)
+
+select max(unique1 ORDER BY tenthous) from tenk1;
+ max
+------
+ 9999
+(1 row)
+
+-- But even then, the index can't be used if we order by multiple columns.
+explain (costs off)
+ select max(unique1 ORDER BY unique1, tenthous) from tenk1;
+ QUERY PLAN
+-------------------------------------
+ Aggregate
+ -> Sort
+ Sort Key: unique1, tenthous
+ -> Seq Scan on tenk1
+(4 rows)
+
-- the planner may choose a generic aggregate here if parallel query is
-- enabled, since that plan will be parallel safe and the "optimized"
-- plan, which has almost identical cost, will not be. we want to test
diff --git a/src/test/regress/sql/aggregates.sql
b/src/test/regress/sql/aggregates.sql
index ca6d1bcfb7..5cdf336fc0 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -368,6 +368,24 @@ explain (costs off)
select max(unique1) from tenk1 where unique1 > 42;
select max(unique1) from tenk1 where unique1 > 42;
+-- When sorting on the column that's being aggregated, indexes can also be
+-- used, but only when the aggregate's operator has the same ordering behavior
+-- as the ORDER BY-clause, i.e. if it is in the same btree opclass as the one
+-- chosen for the ORDER BY clause.
+explain (costs off)
+ select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1;
+select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1;
+explain (costs off)
+ select max(unique1 ORDER BY unique1 USING <) from tenk1;
+select max(unique1 ORDER BY unique1 USING <) from tenk1;
+explain (costs off)
+ select max(unique1 ORDER BY tenthous) from tenk1;
+select max(unique1 ORDER BY tenthous) from tenk1;
+
+-- But even then, the index can't be used if we order by multiple columns.
+explain (costs off)
+ select max(unique1 ORDER BY unique1, tenthous) from tenk1;
+
-- the planner may choose a generic aggregate here if parallel query is
-- enabled, since that plan will be parallel safe and the "optimized"
-- plan, which has almost identical cost, will not be. we want to test