On 07/01/23 21:57, Ankit Kumar Pandey wrote:
On 07/01/23 09:58, David Rowley wrote:
>
> The attached patch has no tests added. It's going to need some of
> those.
While writing test cases, I found that optimization do not happen for
case #1
(which is prime candidate for such operation) like
EXPLAIN (COSTS OFF)
SELECT empno,
depname,
min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary,
sum(salary) OVER (PARTITION BY depname) depsalary
FROM empsalary
ORDER BY depname, empno, enroll_date
This happens because mutual exclusiveness of two operands (when number
of window functions > 1) viz
is_sorted and last activeWindow in the condition:
( !is_sorted && lnext(activeWindows, l) == NULL)
For 2nd last window function, is_sorted is false and path keys get added.
In next run (for last window function), is_sorted becomes true and whole
optimization
part is skipped.
Note: Major issue that if I remove is_sorted from condition, even though
path keys are added, it still do not perform optimization and works same
as in master/unoptimized case.
Perhaps adding path keys at last window function is not doing trick?
Maybe we need to add pathkeys
to all window functions which are subset of query's order by
irrespective of being last or not?
Case #2:
For presorted columns, eg
CREATE INDEX depname_idx ON empsalary(depname);
SET enable_seqscan=0;
EXPLAIN (COSTS OFF)
SELECT empno,
min(salary) OVER (PARTITION BY depname) depminsalary
FROM empsalary
ORDER BY depname, empno;
Is this correct plan:
a)
QUERY PLAN
-------------------------------------------------------
Incremental Sort
Sort Key: depname, empno
Presorted Key: depname
-> WindowAgg
-> Index Scan using depname_idx on empsalary
(5 rows)
or this:
b) (Akin to Optimized version)
QUERY PLAN
-------------------------------------------------------
WindowAgg
-> Incremental Sort
Sort Key: depname, empno
Presorted Key: depname
-> Index Scan using depname_idx on empsalary
(5 rows)
Patched version does (a) because of is_sorted condition.
If we remove both is_sorted and lnext(activeWindows, l) == NULL conditions,
we get correct results in these two cases.
Attached patch with test cases.
For case #2, test cases still uses (a) as expected output which I don't
think is right
and we should revisit. Other than that, only failing case is due to
issue mentioned in case #1.
Thanks
--
Regards,
Ankit Kumar Pandey
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 000d757bdd..781916f219 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -4423,6 +4423,7 @@ create_one_window_path(PlannerInfo *root,
PathTarget *window_target;
ListCell *l;
List *topqual = NIL;
+ bool has_runcondition = false;
/*
* Since each window clause could require a different sort order, we stack
@@ -4457,6 +4458,42 @@ create_one_window_path(PlannerInfo *root,
path->pathkeys,
&presorted_keys);
+ has_runcondition |= (wc->runCondition != NIL);
+
+ /*
+ * If not sorted already and the query has an ORDER BY clause, then we
+ * try to push the entire ORDER BY below the final WindowAgg. We
+ * don't do this when the query has a DISTINCT clause as the planner
+ * might opt to do a Hash Aggregate and spoil our efforts here. We
+ * also can't do this when the ORDER BY contains any WindowFuncs as we
+ * obviously can't sort something that's yet to be evaluated. We also
+ * don't bother with this when there is a WindowClauses with a
+ * runCondition as those can reduce the number of rows to sort in the
+ * ORDER BY and we don't want add complexity to the WindowAgg sort if
+ * the final ORDER BY sort is going to have far fewer rows to sort.
+ * For the same reason, don't bother if the query has a LIMIT clause.
+ */
+ if (!is_sorted && lnext(activeWindows, l) == NULL &&
+ root->parse->distinctClause == NIL &&
+ root->parse->sortClause != NIL &&
+ !has_runcondition && root->parse->limitCount == NULL &&
+ !contain_window_function((Node *) get_sortgrouplist_exprs(root->parse->sortClause,
+ root->processed_tlist)))
+ {
+ List *orderby_pathkeys;
+
+ orderby_pathkeys = make_pathkeys_for_sortclauses(root,
+ root->parse->sortClause,
+ root->processed_tlist);
+
+ if (pathkeys_contained_in(window_pathkeys, orderby_pathkeys))
+ window_pathkeys = orderby_pathkeys;
+
+ is_sorted = pathkeys_count_contained_in(window_pathkeys,
+ path->pathkeys,
+ &presorted_keys);
+ }
+
/* Sort if necessary */
if (!is_sorted)
{
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out
index 90e89fb5b6..2b8b5c4533 100644
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -3982,7 +3982,158 @@ ORDER BY depname, empno;
-> Seq Scan on empsalary
(11 rows)
+-- Do not perform additional sort if column is presorted
+CREATE INDEX depname_idx ON empsalary(depname);
+SET enable_seqscan=0;
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ min(salary) OVER (PARTITION BY depname) depminsalary
+FROM empsalary
+ORDER BY depname, empno;
+ QUERY PLAN
+-------------------------------------------------------
+ Incremental Sort
+ Sort Key: depname, empno
+ Presorted Key: depname
+ -> WindowAgg
+ -> Index Scan using depname_idx on empsalary
+(5 rows)
+
+DROP INDEX depname_idx;
+RESET enable_seqscan;
RESET enable_hashagg;
+-- If window function's ORDER BY is subset of QUERY's ORDER BY, sort as per as
+-- QUERY's ORDER BY to save additional sorting in the end
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (ORDER BY depname) depminsalary
+FROM empsalary
+ORDER BY depname, empno;
+ QUERY PLAN
+-----------------------------------
+ WindowAgg
+ -> Sort
+ Sort Key: depname, empno
+ -> Seq Scan on empsalary
+(4 rows)
+
+-- Same as above, but PARTITION BY clause is included as well
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
+FROM empsalary
+ORDER BY depname, empno, enroll_date;
+ QUERY PLAN
+-----------------------------------------------
+ WindowAgg
+ -> Sort
+ Sort Key: depname, empno, enroll_date
+ -> Seq Scan on empsalary
+(4 rows)
+
+-- Negation of above, when sort optimization is not needed to be performed
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
+FROM empsalary
+ORDER BY empno, enroll_date;
+ QUERY PLAN
+-----------------------------------------
+ Sort
+ Sort Key: empno, enroll_date
+ -> WindowAgg
+ -> Sort
+ Sort Key: depname, empno
+ -> Seq Scan on empsalary
+(6 rows)
+
+-- ORDER BY's in multiple Window functions can be combined into one
+-- if they are subset of QUERY's ORDER BY
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary,
+ sum(salary) OVER (PARTITION BY depname) depsalary,
+ count(*) OVER (ORDER BY enroll_date DESC) c
+FROM empsalary
+ORDER BY depname, empno, enroll_date;
+ QUERY PLAN
+------------------------------------------------------
+ WindowAgg
+ -> WindowAgg
+ -> Sort
+ Sort Key: depname, empno, enroll_date
+ -> WindowAgg
+ -> Sort
+ Sort Key: enroll_date DESC
+ -> Seq Scan on empsalary
+(8 rows)
+
+-- SORT OPTIMIZATION (as in cases above) is not needed to be performed
+-- in cases, as mentioned below.
+-- Case #1: When distinct clause is included
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname) depminsalary,
+ sum(salary) OVER (PARTITION BY depname) depsalary,
+ count(*) OVER (ORDER BY enroll_date DESC) c
+FROM empsalary
+ORDER BY depname, empno;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------
+ Unique
+ -> Sort
+ Sort Key: depname, empno, (min(salary) OVER (?)), (sum(salary) OVER (?)), (count(*) OVER (?))
+ -> WindowAgg
+ -> Sort
+ Sort Key: depname
+ -> WindowAgg
+ -> Sort
+ Sort Key: enroll_date DESC
+ -> Seq Scan on empsalary
+(10 rows)
+
+-- Case #2: When Limit clause is specified
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY empno) depminsalary,
+ sum(salary) OVER (PARTITION BY empno) depsalary
+FROM empsalary
+ORDER BY empno, depname LIMIT 1;
+ QUERY PLAN
+-----------------------------------------------
+ Limit
+ -> Incremental Sort
+ Sort Key: empno, depname
+ Presorted Key: empno
+ -> WindowAgg
+ -> Sort
+ Sort Key: empno
+ -> Seq Scan on empsalary
+(8 rows)
+
+-- Case #3: When ORDER BY contains any WindowFuncs
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ row_number() over (PARTITION BY empno) esalary
+FROM empsalary
+ORDER BY empno, esalary;
+ QUERY PLAN
+--------------------------------------------
+ Incremental Sort
+ Sort Key: empno, (row_number() OVER (?))
+ Presorted Key: empno
+ -> WindowAgg
+ -> Sort
+ Sort Key: empno
+ -> Seq Scan on empsalary
+(7 rows)
+
-- Test Sort node reordering
EXPLAIN (COSTS OFF)
SELECT
diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql
index b7bd0a83da..1f4264ce03 100644
--- a/src/test/regress/sql/window.sql
+++ b/src/test/regress/sql/window.sql
@@ -1324,8 +1324,84 @@ SELECT DISTINCT
FROM empsalary
ORDER BY depname, empno;
+-- Do not perform additional sort if column is presorted
+CREATE INDEX depname_idx ON empsalary(depname);
+SET enable_seqscan=0;
+
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ min(salary) OVER (PARTITION BY depname) depminsalary
+FROM empsalary
+ORDER BY depname, empno;
+DROP INDEX depname_idx;
+RESET enable_seqscan;
RESET enable_hashagg;
+-- If window function's ORDER BY is subset of QUERY's ORDER BY, sort as per as
+-- QUERY's ORDER BY to save additional sorting in the end
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (ORDER BY depname) depminsalary
+FROM empsalary
+ORDER BY depname, empno;
+
+-- Same as above, but PARTITION BY clause is included as well
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
+FROM empsalary
+ORDER BY depname, empno, enroll_date;
+
+-- Negation of above, when sort optimization is not needed to be performed
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
+FROM empsalary
+ORDER BY empno, enroll_date;
+
+-- ORDER BY's in multiple Window functions can be combined into one
+-- if they are subset of QUERY's ORDER BY
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary,
+ sum(salary) OVER (PARTITION BY depname) depsalary,
+ count(*) OVER (ORDER BY enroll_date DESC) c
+FROM empsalary
+ORDER BY depname, empno, enroll_date;
+
+-- SORT OPTIMIZATION (as in cases above) is not needed to be performed
+-- in cases, as mentioned below.
+
+-- Case #1: When distinct clause is included
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname) depminsalary,
+ sum(salary) OVER (PARTITION BY depname) depsalary,
+ count(*) OVER (ORDER BY enroll_date DESC) c
+FROM empsalary
+ORDER BY depname, empno;
+
+-- Case #2: When Limit clause is specified
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY empno) depminsalary,
+ sum(salary) OVER (PARTITION BY empno) depsalary
+FROM empsalary
+ORDER BY empno, depname LIMIT 1;
+
+-- Case #3: When ORDER BY contains any WindowFuncs
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ row_number() over (PARTITION BY empno) esalary
+FROM empsalary
+ORDER BY empno, esalary;
+
-- Test Sort node reordering
EXPLAIN (COSTS OFF)
SELECT
--
2.37.2