On 18/7/2024 19:49, Matthias van de Meent wrote:
On Wed, 17 Jul 2024 at 16:09, Andrei Lepikhov <lepi...@gmail.com> wrote:
On 17/7/2024 16:33, Matthias van de Meent wrote:
On Wed, 17 Jul 2024 at 05:29, Andrei Lepikhov <lepi...@gmail.com> wrote:
Because the @<@ and @>@ operators are not registered as commutative,
it couldn't apply the optimization in your patch, while the btree
operator check does allow it to apply the optimization.
Ok, I got it.
And next issue: I think it would be better to save cycles than to free
some piece of memory, so why not to break the foreach cycle if you
already matched the opfamily?
Also, in the patch attached I added your smoothed test to the aggregates.sql
Aside: Arguably, checking for commutator operators would not be
incorrect when looking at it from "applied operators" point of view,
but if that commutative operator isn't registered as opposite ordering
of the same btree opclass, then we'd probably break some assumptions
of some aggregate's sortop - it could be registered with another
opclass, and that could cause us to select a different btree opclass
(thus: ordering) than is indicated to be required by the aggregate;
the thing we're trying to protect against hereYes, I also think if someone doesn't register < as a commutator to >, it
may mean they do it intentionally: may be it is a bit different
sortings? - this subject is too far from my experience and I can agree
with your approach.
--
regards, Andrei Lepikhov
diff --git a/src/backend/optimizer/plan/planagg.c
b/src/backend/optimizer/plan/planagg.c
index afb5445b77..9b1d6f4e43 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -253,6 +253,20 @@ can_minmax_aggs(PlannerInfo *root, List **context)
if (list_length(aggref->args) != 1)
return false; /* it couldn't be MIN/MAX */
+ /*
+ * We might implement the optimization when a FILTER clause is
present
+ * by adding the filter to the quals of the generated subquery.
For
+ * now, just punt.
+ */
+ if (aggref->aggfilter != NULL)
+ return false;
+
+ curTarget = (TargetEntry *) linitial(aggref->args);
+
+ aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
+ if (!OidIsValid(aggsortop))
+ return false; /* not a MIN/MAX aggregate */
+
/*
* ORDER BY is usually irrelevant for MIN/MAX, but it can
change the
* outcome if the aggsortop's operator class recognizes
non-identical
@@ -267,22 +281,55 @@ can_minmax_aggs(PlannerInfo *root, List **context)
* quickly.
*/
if (aggref->aggorder != NIL)
- return false;
- /* note: we do not care if DISTINCT is mentioned ... */
-
- /*
- * We might implement the optimization when a FILTER clause is
present
- * by adding the filter to the quals of the generated subquery.
For
- * now, just punt.
- */
- if (aggref->aggfilter != NULL)
- return false;
+ {
+ SortGroupClause *orderClause;
+
+ /*
+ * If the order clause is the same column as the one
we're
+ * aggregating, we can still use the index: It is
undefined which
+ * value is MIN() or MAX(), as well as which value is
first or
+ * last when sorted. So, we can still use the index IFF
the
+ * aggregated expression equals the expression used in
the
+ * ordering operation.
+ */
+
+ /*
+ * We only accept a single argument to min/max
aggregates,
+ * orderings that have more clauses won't provide
correct results.
+ */
+ Assert(list_length(aggref->aggorder) == 1);
+
+ orderClause = castNode(SortGroupClause,
linitial(aggref->aggorder));
+
+ Assert(orderClause->tleSortGroupRef ==
curTarget->ressortgroupref);
+
+ if (orderClause->sortop != aggsortop)
+ {
+ List *btclasses;
+ ListCell *cell;
+ bool match = false;
+
+ btclasses =
get_op_btree_interpretation(orderClause->sortop);
+
+ foreach(cell, btclasses)
+ {
+ OpBtreeInterpretation *interpretation;
+ interpretation = (OpBtreeInterpretation
*) lfirst(cell);
+
+ if (op_in_opfamily(aggsortop,
interpretation->opfamily_id))
+ {
+ match = true;
+ break;
+ }
+ }
+
+ if (!match)
+ return false;
+ }
+ }
- aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
- if (!OidIsValid(aggsortop))
- return false; /* not a MIN/MAX aggregate */
+ /* note: we do not care if DISTINCT is mentioned ... */
- curTarget = (TargetEntry *) linitial(aggref->args);
if (contain_mutable_functions((Node *) curTarget->expr))
return false; /* not potentially indexable */
diff --git a/src/test/regress/expected/aggregates.out
b/src/test/regress/expected/aggregates.out
index a5596ab210..94c8cf77f8 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1003,6 +1003,124 @@ select max(unique1) from tenk1 where unique1 > 42;
9999
(1 row)
+-- When sorting on the column that's being aggregated, indexes can also be
+-- used, but only when the aggregate's operator has the same ordering behavior
+-- as the ORDER BY-clause, i.e. if it is in the same btree opclass as the one
+-- chosen for the ORDER BY clause.
+explain (costs off)
+ select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1;
+ QUERY PLAN
+------------------------------------------------------------
+ Result
+ InitPlan 1
+ -> Limit
+ -> Index Only Scan using tenk1_unique1 on tenk1
+ Index Cond: (unique1 IS NOT NULL)
+(5 rows)
+
+select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1;
+ min
+-----
+ 0
+(1 row)
+
+explain (costs off)
+ select max(unique1 ORDER BY unique1 USING <) from tenk1;
+ QUERY PLAN
+---------------------------------------------------------------------
+ Result
+ InitPlan 1
+ -> Limit
+ -> Index Only Scan Backward using tenk1_unique1 on tenk1
+ Index Cond: (unique1 IS NOT NULL)
+(5 rows)
+
+select max(unique1 ORDER BY unique1 USING <) from tenk1;
+ max
+------
+ 9999
+(1 row)
+
+explain (costs off)
+ select max(unique1 ORDER BY tenthous) from tenk1;
+ QUERY PLAN
+-------------------------------
+ Aggregate
+ -> Sort
+ Sort Key: tenthous
+ -> Seq Scan on tenk1
+(4 rows)
+
+select max(unique1 ORDER BY tenthous) from tenk1;
+ max
+------
+ 9999
+(1 row)
+
+CREATE OPERATOR @>@ (function=int4gt, leftarg=int4, rightarg=int4);
+CREATE OPERATOR @<@ (function=int4lt, leftarg=int4, rightarg=int4);
+CREATE OPERATOR @=@ (function=int4eq, leftarg=int4, rightarg=int4);
+CREATE OPERATOR FAMILY my_family USING btree;
+CREATE OPERATOR CLASS my_int_ops
+ FOR TYPE int
+ USING btree FAMILY my_family AS
+ OPERATOR 1 @<@ FOR SEARCH,
+ OPERATOR 5 @>@ FOR SEARCH,
+ OPERATOR 3 @=@,
+ FUNCTION 1 btint4cmp;
+CREATE AGGREGATE my_int_max (
+ BASETYPE = int4,
+ SFUNC = int4larger,
+ STYPE = int4,
+ SORTOP = @>@
+);
+CREATE INDEX idx_int4 ON tenk1 (unique1 my_int_ops);
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT my_int_max(unique1::int4 ORDER BY unique1::int4 USING @<@ ) from tenk1;
+ QUERY PLAN
+-----------------------------------------------------------------------
+ Result
+ Output: (InitPlan 1).col1
+ InitPlan 1
+ -> Limit
+ Output: tenk1.unique1
+ -> Index Only Scan Backward using idx_int4 on public.tenk1
+ Output: tenk1.unique1
+ Index Cond: (tenk1.unique1 IS NOT NULL)
+(8 rows)
+
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT my_int_max(unique1::int4 ORDER BY unique1::int4 USING @>@ ) from tenk1;
+ QUERY PLAN
+-----------------------------------------------------------------------
+ Result
+ Output: (InitPlan 1).col1
+ InitPlan 1
+ -> Limit
+ Output: tenk1.unique1
+ -> Index Only Scan Backward using idx_int4 on public.tenk1
+ Output: tenk1.unique1
+ Index Cond: (tenk1.unique1 IS NOT NULL)
+(8 rows)
+
+DROP AGGREGATE my_int_max(int4);
+DROP OPERATOR CLASS my_int_ops USING btree CASCADE;
+NOTICE: drop cascades to index idx_int4
+DROP OPERATOR FAMILY my_family USING btree;
+DROP OPERATOR @>@ (int4, int4);
+DROP OPERATOR @<@ (int4, int4);
+DROP OPERATOR @=@ (int4, int4);
+-- But even then, the index can't be used if we order by multiple columns.
+explain (costs off)
+ select max(unique1 ORDER BY unique1, tenthous) from tenk1;
+ QUERY PLAN
+-------------------------------------
+ Aggregate
+ -> Sort
+ Sort Key: unique1, tenthous
+ -> Seq Scan on tenk1
+(4 rows)
+
-- the planner may choose a generic aggregate here if parallel query is
-- enabled, since that plan will be parallel safe and the "optimized"
-- plan, which has almost identical cost, will not be. we want to test
diff --git a/src/test/regress/sql/aggregates.sql
b/src/test/regress/sql/aggregates.sql
index ca6d1bcfb7..19729ae5f2 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -368,6 +368,57 @@ explain (costs off)
select max(unique1) from tenk1 where unique1 > 42;
select max(unique1) from tenk1 where unique1 > 42;
+-- When sorting on the column that's being aggregated, indexes can also be
+-- used, but only when the aggregate's operator has the same ordering behavior
+-- as the ORDER BY-clause, i.e. if it is in the same btree opclass as the one
+-- chosen for the ORDER BY clause.
+explain (costs off)
+ select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1;
+select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1;
+explain (costs off)
+ select max(unique1 ORDER BY unique1 USING <) from tenk1;
+select max(unique1 ORDER BY unique1 USING <) from tenk1;
+explain (costs off)
+ select max(unique1 ORDER BY tenthous) from tenk1;
+select max(unique1 ORDER BY tenthous) from tenk1;
+
+CREATE OPERATOR @>@ (function=int4gt, leftarg=int4, rightarg=int4);
+CREATE OPERATOR @<@ (function=int4lt, leftarg=int4, rightarg=int4);
+CREATE OPERATOR @=@ (function=int4eq, leftarg=int4, rightarg=int4);
+
+CREATE OPERATOR FAMILY my_family USING btree;
+CREATE OPERATOR CLASS my_int_ops
+ FOR TYPE int
+ USING btree FAMILY my_family AS
+ OPERATOR 1 @<@ FOR SEARCH,
+ OPERATOR 5 @>@ FOR SEARCH,
+ OPERATOR 3 @=@,
+ FUNCTION 1 btint4cmp;
+
+CREATE AGGREGATE my_int_max (
+ BASETYPE = int4,
+ SFUNC = int4larger,
+ STYPE = int4,
+ SORTOP = @>@
+);
+
+CREATE INDEX idx_int4 ON tenk1 (unique1 my_int_ops);
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT my_int_max(unique1::int4 ORDER BY unique1::int4 USING @<@ ) from tenk1;
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT my_int_max(unique1::int4 ORDER BY unique1::int4 USING @>@ ) from tenk1;
+
+DROP AGGREGATE my_int_max(int4);
+DROP OPERATOR CLASS my_int_ops USING btree CASCADE;
+DROP OPERATOR FAMILY my_family USING btree;
+DROP OPERATOR @>@ (int4, int4);
+DROP OPERATOR @<@ (int4, int4);
+DROP OPERATOR @=@ (int4, int4);
+
+-- But even then, the index can't be used if we order by multiple columns.
+explain (costs off)
+ select max(unique1 ORDER BY unique1, tenthous) from tenk1;
+
-- the planner may choose a generic aggregate here if parallel query is
-- enabled, since that plan will be parallel safe and the "optimized"
-- plan, which has almost identical cost, will not be. we want to test