Re: Todo: Teach planner to evaluate multiple windows in the optimal order

Ankit Kumar Pandey Sun, 15 Jan 2023 09:53:13 -0800

On 13/01/23 07:48, David Rowley wrote:

It would just care if the
pathkeys were present and return a list of pathkeys not contained so
that an incremental sort could be done only on the returned list and a

Unique on an empty returned list.


In create_final_distinct_paths, presorted keys are determined from

input_rel->pathlist & needed_pathkeys. Problem with input_rel->pathlist

is that, for index node, useful_pathkeys is stored ininput_rel->pathlist but this useful_pathkeys

is determined from truncate_useless_pathkeys(index_pathkeys) whichremoves index_keys if ordering is different.

Hence, input_rel->pathlist returns null for select distinct b,a from abwhere a < 10; even if index is created on a.

In order to tackle this, I have added index_pathkeys in indexpath nodeitself.


Although I started this patch from master, I merged changes to window sort

optimizations.


In patched version:

set enable_hashagg=0;
set enable_seqscan=0;

explain select distinct relname,relkind,count(*) over (partition by
relkind) from pg_Class;
                                              QUERY PLAN
-------------------------------------------------------------------------------------------------------
 Unique  (cost=10000000039.49..10000000063.73 rows=415 width=73)
   ->  Incremental Sort  (cost=10000000039.49..10000000060.62 rows=415 width=73)
         Sort Key: relkind, relname, (count(*) OVER (?))
         Presorted Key: relkind
         ->  WindowAgg  (cost=10000000034.20..10000000041.46 rows=415 width=73)
               ->  Sort  (cost=10000000034.20..10000000035.23 rows=415 width=65)
                     Sort Key: relkind
                     ->  Seq Scan on pg_class  
(cost=10000000000.00..10000000016.15 rows=415 width=65)
(8 rows)

explain select distinct b,a from ab where a < 10;
                                    QUERY PLAN
----------------------------------------------------------------------------------
 Unique  (cost=0.71..60.05 rows=611 width=8)
   ->  Incremental Sort  (cost=0.71..55.55 rows=900 width=8)
         Sort Key: a, b
         Presorted Key: a
         ->  Index Scan using ab_a_idx on ab  (cost=0.29..28.04 rows=900 
width=8)
               Index Cond: (a < 10)
(6 rows)

explain select distinct b,a, count(*) over (partition by a) from abcd order by 
a,b;
                                             QUERY PLAN
-----------------------------------------------------------------------------------------------------
 Unique  (cost=10000021174.63..10000038095.75 rows=60 width=16)
   ->  Incremental Sort  (cost=10000021174.63..10000036745.75 rows=180000 
width=16)
         Sort Key: a, b, (count(*) OVER (?))
         Presorted Key: a, b
         ->  WindowAgg  (cost=10000020948.87..10000024098.87 rows=180000 
width=16)
               ->  Sort  (cost=10000020948.87..10000021398.87 rows=180000 
width=8)
                     Sort Key: a, b
                     ->  Seq Scan on abcd  (cost=10000000000.00..10000002773.00 
rows=180000 width=8)
(8 rows)

explain select distinct a, b, count(*) over (partition by a,b,c) from abcd;
                                              QUERY PLAN
------------------------------------------------------------------------------------------------------
 Unique  (cost=10000021580.47..10000036629.31 rows=60 width=20)
   ->  Incremental Sort  (cost=10000021580.47..10000035279.31 rows=180000 
width=20)
         Sort Key: a, b, c, (count(*) OVER (?))
         Presorted Key: a, b, c
         ->  WindowAgg  (cost=10000021561.37..10000025611.37 rows=180000 
width=20)
               ->  Sort  (cost=10000021561.37..10000022011.37 rows=180000 
width=12)
                     Sort Key: a, b, c
                     ->  Seq Scan on abcd  (cost=10000000000.00..10000002773.00 
rows=180000 width=12)
(8 rows)

explain select distinct a, b, count(*) over (partition by b,a, c) from abcd;

                                            QUERY PLAN

---------------------------------------------------------------------------------------------------

 Unique  (cost=2041.88..36764.90 rows=60 width=20)

   ->  Incremental Sort  (cost=2041.88..35414.90 rows=180000 width=20)

         Sort Key: b, a, c, (count(*) OVER (?))

         Presorted Key: b, a, c

         ->  WindowAgg  (cost=1989.94..25746.96 rows=180000 width=20)

               ->  Incremental Sort  (cost=1989.94..22146.96 rows=180000 
width=12)

                     Sort Key: b, a, c

                     Presorted Key: b

                     ->  Index Scan using b_idx on abcd  (cost=0.29..7174.62 
rows=180000 width=12)

(9 rows)


In master:

explain select distinct relname,relkind,count(*) over (partition by
relkind) from pg_Class;

                                              QUERY PLAN
-------------------------------------------------------------------------------------------------------
 Unique  (cost=10000000059.50..10000000063.65 rows=415 width=73)
   ->  Sort  (cost=10000000059.50..10000000060.54 rows=415 width=73)
         Sort Key: relname, relkind, (count(*) OVER (?))
         ->  WindowAgg  (cost=10000000034.20..10000000041.46 rows=415 width=73)
               ->  Sort  (cost=10000000034.20..10000000035.23 rows=415 width=65)
                     Sort Key: relkind
                     ->  Seq Scan on pg_class  
(cost=10000000000.00..10000000016.15 rows=415 width=65)
(7 rows)

explain select distinct b,a from ab where a < 10;

                                    QUERY PLAN
----------------------------------------------------------------------------------
 Unique  (cost=72.20..78.95 rows=611 width=8)
   ->  Sort  (cost=72.20..74.45 rows=900 width=8)
         Sort Key: b, a
         ->  Index Scan using ab_a_idx on ab  (cost=0.29..28.04 rows=900 
width=8)
               Index Cond: (a < 10)
(5 rows)

explain select distinct b,a, count(*) over (partition by a) from abcd order by 
a,b;

                                             QUERY PLAN

-----------------------------------------------------------------------------------------------------

 Unique  (cost=10000023704.77..10000041084.40 rows=60 width=16)

   ->  Incremental Sort  (cost=10000023704.77..10000039734.40 rows=180000 
width=16)

         Sort Key: a, b, (count(*) OVER (?))

         Presorted Key: a

         ->  WindowAgg  (cost=10000020948.87..10000024098.87 rows=180000 
width=16)

               ->  Sort  (cost=10000020948.87..10000021398.87 rows=180000 
width=8)

                     Sort Key: a

                     ->  Seq Scan on abcd  (cost=10000000000.00..10000002773.00 
rows=180000 width=8)

(8 rows)
explain select distinct a, b, count(*) over (partition by b,a, c) from abcd;
                                            QUERY PLAN
---------------------------------------------------------------------------------------------------
 Unique  (cost=45151.33..46951.33 rows=60 width=20)
   ->  Sort  (cost=45151.33..45601.33 rows=180000 width=20)
         Sort Key: a, b, (count(*) OVER (?))
         ->  WindowAgg  (cost=1989.94..25746.96 rows=180000 width=20)
               ->  Incremental Sort  (cost=1989.94..22146.96 rows=180000 
width=12)
                     Sort Key: b, a, c
                     Presorted Key: b
                     ->  Index Scan using b_idx on abcd  (cost=0.29..7174.62 
rows=180000 width=12)
(8 rows)


Note: Composite keys are also handled.

create index xy_idx on xyz(x,y);

explain select distinct x,z,y from xyz;
                                    QUERY PLAN
----------------------------------------------------------------------------------
 Unique  (cost=0.86..55.97 rows=60 width=12)
   ->  Incremental Sort  (cost=0.86..51.47 rows=600 width=12)
         Sort Key: x, y, z
         Presorted Key: x, y
         ->  Index Scan using xy_idx on xyz  (cost=0.15..32.80 rows=600 
width=12)
(5 rows)


There are some cases where different kind of scan happens

explain select distinct x,y from xyz where y < 10;
                                    QUERY PLAN
-----------------------------------------------------------------------------------
 Unique  (cost=47.59..51.64 rows=60 width=8)
   ->  Sort  (cost=47.59..48.94 rows=540 width=8)
         Sort Key: x, y
         ->  Bitmap Heap Scan on xyz  (cost=12.34..23.09 rows=540 width=8)
               Recheck Cond: (y < 10)

-> Bitmap Index Scan on y_idx (cost=0.00..12.20rows=540 width=0)

                     Index Cond: (y < 10)
(7 rows)

As code only checks from IndexPath (at the moment), other scan paths arenot covered.

Is it okay to cover these in same way as I did for IndexPath? (with nolimitation on this behaviour on certain path types?)

Also, I am assuming distinct pathkeys can be changed without any issues.As changes are limited to modification in distinct path only,


I don't see this affecting other nodes. Test cases are green,

with a couple of failures in window functions (one which I had added)and one very weird:


EXPLAIN (COSTS OFF)
SELECT DISTINCT
       empno,
       enroll_date,
       depname,
       sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
       min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
FROM empsalary
ORDER BY depname, empno;
                                             QUERY PLAN
-----------------------------------------------------------------------------------------------------
 Incremental Sort
   Sort Key: depname, empno
   Presorted Key: depname
   ->  Unique
         ->  Incremental Sort
               Sort Key: depname, enroll_date, empno, (sum(salary) OVER (?)), 
(min(salary) OVER (?))
               Presorted Key: depname
               ->  WindowAgg
                     ->  Incremental Sort
                           Sort Key: depname, empno
                           Presorted Key: depname
                           ->  WindowAgg
                                 ->  Sort
                                       Sort Key: depname, enroll_date
                                       ->  Seq Scan on empsalary
(15 rows)

In above query plan, unique used to come after Incremental sort in themaster.



Pending:

1. Consider if Pathkey.pk_strategy and pk_nulls_first need to becompared too, this is pending


as I have to look these up and understand them.

2. Test cases (failures and new cases)

3. Improve comments


Patch v8 attached.

Please let me know any review comments, will address these in followup patch

with pending items.


Thanks,

Ankit

diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index b9da588114..e13c8f1914 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -1015,7 +1015,6 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 								  orderbyclauses,
 								  orderbyclausecols,
 								  useful_pathkeys,
-								  index_pathkeys,
 								  index_is_ordered ?
 								  ForwardScanDirection :
 								  NoMovementScanDirection,
@@ -1038,7 +1037,6 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 									  orderbyclauses,
 									  orderbyclausecols,
 									  useful_pathkeys,
-									  index_pathkeys,
 									  index_is_ordered ?
 									  ForwardScanDirection :
 									  NoMovementScanDirection,
@@ -1074,7 +1072,6 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 									  NIL,
 									  NIL,
 									  useful_pathkeys,
-									  index_pathkeys,
 									  BackwardScanDirection,
 									  index_only_scan,
 									  outer_relids,
@@ -1092,7 +1089,6 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 										  NIL,
 										  NIL,
 										  useful_pathkeys,
-										  index_pathkeys,
 										  BackwardScanDirection,
 										  index_only_scan,
 										  outer_relids,
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 28be495370..609df93dc9 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1968,21 +1968,3 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
 }
-
-/*
- * Returns True if key1 subset is key2 i.e.
- * atleast one key from keys1 is present on key2
- */
-bool
-is_pathkey_subset(List	*keys1, List* keys2)
-{
-	ListCell* l;
-	foreach(l, keys1)
-	{
-		PathKey    *pathkey1 = (PathKey *) lfirst(l);
-		if(pathkey_is_redundant(pathkey1, keys2)){
-			return true;
-		}
-	}
-	return false;
-}
\ No newline at end of file
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index ee883048e3..044fb24666 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -4421,14 +4421,8 @@ create_one_window_path(PlannerInfo *root,
 					   List *activeWindows)
 {
 	PathTarget *window_target;
-	Query	   *parse = root->parse;
 	ListCell   *l;
 	List	   *topqual = NIL;
-	List	   *window_pathkeys;
-	List	   *orderby_pathkeys = NIL;
-	int			sort_pushdown_idx = -1;
-	int			presorted_keys;
-	bool		is_sorted;
 
 	/*
 	 * Since each window clause could require a different sort order, we stack
@@ -4447,99 +4441,17 @@ create_one_window_path(PlannerInfo *root,
 	 */
 	window_target = input_target;
 
-	/*
-	 * Here we attempt to minimize the number of sorts which must be performed
-	 * for queries with an ORDER BY clause.
-	 *
-	 * It's possible due to select_active_windows() putting the WindowClauses
-	 * with the lowest tleSortGroupRef last in the activeWindows list that the
-	 * final WindowClause has a subset of the sort requirements that the
-	 * query's ORDER BY clause has.  Below we try to detect this case and if
-	 * we find this is true, we consider adjusting the sort that's done for
-	 * WindowAggs and include the additional clauses so that no additional
-	 * sorting is required for the query's ORDER BY clause.
-	 *
-	 * We don't try this optimization for the following cases:
-	 *
-	 * 1.	If the query has a DISTINCT clause.  We needn't waste any
-	 *		additional effort for the more strict sort order as if DISTINCT
-	 *		is done via Hash Aggregate then that's going to undo this work.
-	 *
-	 * 2.	If the query has a LIMIT clause.  The top-level sort will be
-	 *		able to use a top-n sort which might be more efficient than
-	 *		sorting	by the additional columns.  If the LIMIT does not reduce
-	 *		the	number of rows significantly then this might not be true, but
-	 *		we don't try to consider that here.
-	 *
-	 * 3.	If the top-level WindowClause has a runCondition then this can
-	 *		filter out tuples and make the final sort cheaper.  If we pushed
-	 *		the sort down below the WindowAgg then we'd need to sort all rows
-	 *		including ones that the runCondition might filter.  This may waste
-	 *		effort so we just don't try to push down the sort for this case.
-	 *
-	 * 4.	If the ORDER BY contains any expressions containing WindowFuncs
-	 *		then we can't push down the sort as these obviously must be
-	 *		evaluated before they can be sorted.
-	 */
-	if (parse->sortClause != NIL &&
-		parse->limitCount == NULL &&
-		linitial_node(WindowClause, activeWindows)->runCondition == NIL &&
-		!contain_window_function((Node *) get_sortgrouplist_exprs(parse->sortClause,
-								 root->processed_tlist)))
-	{
-		orderby_pathkeys = make_pathkeys_for_sortclauses(root,
-														 parse->sortClause,
-														 root->processed_tlist);
-
-		/*
-		 * Loop backwards over the WindowClauses looking for the first
-		 * WindowClause which has pathkeys not contained in the
-		 * orderby_pathkeys.  What we're looking for here is the lowest level
-		 * that we push the additional sort requirements down into.  If we
-		 * fail here then sort_pushdown_idx will remain at -1.
-		 */
-		foreach_reverse(l, activeWindows)
-		{
-			WindowClause *wc = lfirst_node(WindowClause, l);
-
-			window_pathkeys = make_pathkeys_for_window(root,
-														wc,
-														root->processed_tlist);
-
-			if (pathkeys_contained_in(window_pathkeys, orderby_pathkeys))
-				sort_pushdown_idx = foreach_current_index(l);
-			else
-				break;
-		}
-	}
-
 	foreach(l, activeWindows)
 	{
 		WindowClause *wc = lfirst_node(WindowClause, l);
+		List	   *window_pathkeys;
+		int			presorted_keys;
+		bool		is_sorted;
 		bool		topwindow;
 
 		window_pathkeys = make_pathkeys_for_window(root,
-													wc,
-													root->processed_tlist);
-
-		/*
-		 * When we get to the sort_pushdown_idx WindowClause, if the path is
-		 * not already correctly sorted, then just use the pathkeys for the
-		 * query's ORDER BY clause instead of the WindowClause's pathkeys.
-		 * When the path is already correctly sorted there's no point in
-		 * adjusting the pathkeys as this just moves the sort down without
-		 * actually reducing the number of sorts which are required in the
-		 * plan overall.  sort_pushdown_idx will be -1 and never match when
-		 * this optimization was disabled above.
-		 */
-		if (foreach_current_index(l) == sort_pushdown_idx &&
-			!pathkeys_contained_in(window_pathkeys, path->pathkeys))
-		{
-			/* check to make sure sort_pushdown_idx was set correctly */
-			Assert(pathkeys_contained_in(window_pathkeys, orderby_pathkeys));
-
-			window_pathkeys = orderby_pathkeys;
-		}
+												   wc,
+												   root->processed_tlist);
 
 		is_sorted = pathkeys_count_contained_in(window_pathkeys,
 												path->pathkeys,
@@ -4933,43 +4845,9 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 			bool		is_sorted;
 			int			presorted_keys;
 
-
-			/*
-			 * Consider any any sorting which can be reutilized for distinct
-			 * even if ordering differs.
-			 */
-			if (IsA(lfirst(lc), IndexPath))
-			{
-				/*
-				 * Find if index pathkeys are subset of needed pathkeys
-				 * if so then prepend these keys so that sorting is not needed
-				 */
-				IndexPath *idx_path  = (IndexPath*) lfirst(lc);
-				if (is_pathkey_subset(needed_pathkeys, idx_path->indexpathkeys))
-				{
-
-					needed_pathkeys = list_concat_unique(list_copy(idx_path->indexpathkeys), needed_pathkeys);
-				}
-				is_sorted = pathkeys_count_contained_in(needed_pathkeys,
-													idx_path->indexpathkeys,
-													&presorted_keys);
-			}
-			else
-			{
-				/*
-				 * If needed pathkeys are subset of query_pathkeys,
-				 * if so, lower nodes will have sorted some columns which
-				 * we can reuse.
-				 */
-				if (is_pathkey_subset(needed_pathkeys, root->query_pathkeys))
-				{
-					needed_pathkeys = list_concat_unique(list_copy(root->query_pathkeys), needed_pathkeys);
-				}
-				is_sorted = pathkeys_count_contained_in(needed_pathkeys,
+			is_sorted = pathkeys_count_contained_in(needed_pathkeys,
 													input_path->pathkeys,
 													&presorted_keys);
-			}
-
 
 			if (is_sorted)
 				sorted_path = input_path;
@@ -6626,7 +6504,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
 
 	/* Estimate the cost of index scan */
 	indexScanPath = create_index_path(root, indexInfo,
-									  NIL, NIL, NIL, NIL, NIL,
+									  NIL, NIL, NIL, NIL,
 									  ForwardScanDirection, false,
 									  NULL, 1.0, false);
 
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index c666be4f4f..4478036bb6 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1000,7 +1000,6 @@ create_index_path(PlannerInfo *root,
 				  List *indexorderbys,
 				  List *indexorderbycols,
 				  List *pathkeys,
-				  List *indexpathkeys,
 				  ScanDirection indexscandir,
 				  bool indexonly,
 				  Relids required_outer,
@@ -1024,9 +1023,7 @@ create_index_path(PlannerInfo *root,
 	pathnode->indexclauses = indexclauses;
 	pathnode->indexorderbys = indexorderbys;
 	pathnode->indexorderbycols = indexorderbycols;
-
 	pathnode->indexscandir = indexscandir;
-	pathnode->indexpathkeys = indexpathkeys;
 
 	cost_index(pathnode, root, loop_count, partial_path);
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index a2abb40356..c20b7298a3 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1601,7 +1601,6 @@ typedef struct IndexPath
 	List	   *indexclauses;
 	List	   *indexorderbys;
 	List	   *indexorderbycols;
-	List	   *indexpathkeys;
 	ScanDirection indexscandir;
 	Cost		indextotalcost;
 	Selectivity indexselectivity;
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index 0de9d73dac..529a382d28 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -378,30 +378,14 @@ lnext(const List *l, const ListCell *c)
 		 (cell = NULL, false); \
 		 cell##__state.i++)
 
-/*
- * foreach_reverse -
- *	  a convenience macro for looping through a list in reverse
- *
- * This is equivalent to foreach, only it loops over the list starting with
- * the last element and ends on the first element.
- */
-#define foreach_reverse(cell, lst)	\
-	for (ForEachState cell##__state = {(lst), cell##__state.l->length - 1}; \
-		 (cell##__state.l != NIL && \
-		  cell##__state.i >= 0) ? \
-		 (cell = &cell##__state.l->elements[cell##__state.i], true) : \
-		 (cell = NULL, false); \
-		 cell##__state.i--)
-
 /*
  * foreach_delete_current -
  *	  delete the current list element from the List associated with a
- *	  surrounding foreach() or foreach_reverse() loop, returning the new List
-*	  pointer.
+ *	  surrounding foreach() loop, returning the new List pointer.
  *
  * This is equivalent to list_delete_cell(), but it also adjusts the foreach
- * or foreach_reverse loop's state so that no list elements will be missed.
- * Do not delete elements from an active foreach loop's list in any other way!
+ * loop's state so that no list elements will be missed.  Do not delete
+ * elements from an active foreach loop's list in any other way!
  */
 #define foreach_delete_current(lst, cell)	\
 	(cell##__state.i--, \
@@ -409,9 +393,8 @@ lnext(const List *l, const ListCell *c)
 
 /*
  * foreach_current_index -
- *	  get the zero-based index within the list of the current element in the
- *	  surrounding foreach() or foreach_reverse() loop; pass the name of the
- *	  "ListCell *" iterator variable.
+ *	  get the zero-based list index of a surrounding foreach() loop's
+ *	  current element; pass the name of the "ListCell *" iterator variable.
  *
  * Beware of using this after foreach_delete_current(); the value will be
  * out of sync for the rest of the current loop iteration.  Anyway, since
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 1833218efc..02305ef902 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -44,7 +44,6 @@ extern IndexPath *create_index_path(PlannerInfo *root,
 									List *indexorderbys,
 									List *indexorderbycols,
 									List *pathkeys,
-									List *indexpathkeys,
 									ScanDirection indexscandir,
 									bool indexonly,
 									Relids required_outer,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 1204e9623f..65a3c35611 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -248,7 +248,6 @@ extern List *truncate_useless_pathkeys(PlannerInfo *root,
 									   RelOptInfo *rel,
 									   List *pathkeys);
 extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);
-extern bool is_pathkey_subset(List	*keys1, List* keys2);
 extern List *append_pathkeys(List *target, List *source);
 extern PathKey *make_canonical_pathkey(PlannerInfo *root,
 									   EquivalenceClass *eclass, Oid opfamily,
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out
index ab94f810fc..b2c6605e60 100644
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -3984,210 +3984,7 @@ ORDER BY depname, empno;
                                  ->  Seq Scan on empsalary
 (12 rows)
 
--- Do not perform sort pushdown if column is presorted
-CREATE INDEX depname_idx ON empsalary(depname);
-SET enable_seqscan=0;
-EXPLAIN (COSTS OFF)
-SELECT empno,
-        min(salary) OVER (PARTITION BY depname) depminsalary
-FROM empsalary
-ORDER BY depname, empno;
-                      QUERY PLAN                       
--------------------------------------------------------
- Incremental Sort
-   Sort Key: depname, empno
-   Presorted Key: depname
-   ->  WindowAgg
-         ->  Index Scan using depname_idx on empsalary
-(5 rows)
-
--- Do perform sort pushdown if columns are only partially sorted
-EXPLAIN (COSTS OFF)
-SELECT empno,
-        min(salary) OVER (PARTITION BY depname, empno) depminsalary
-FROM empsalary
-ORDER BY depname, empno, enroll_date;
-                      QUERY PLAN                       
--------------------------------------------------------
- WindowAgg
-   ->  Incremental Sort
-         Sort Key: depname, empno, enroll_date
-         Presorted Key: depname
-         ->  Index Scan using depname_idx on empsalary
-(5 rows)
-
-DROP INDEX depname_idx;
-RESET enable_seqscan;
 RESET enable_hashagg;
--- If window function's ORDER BY is subset of QUERY's ORDER BY, sort as per as
--- QUERY's ORDER BY to save additional sorting in the end
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       depname,
-       min(salary) OVER (ORDER BY depname) depminsalary
-FROM empsalary
-ORDER BY depname, empno;
-            QUERY PLAN             
------------------------------------
- WindowAgg
-   ->  Sort
-         Sort Key: depname, empno
-         ->  Seq Scan on empsalary
-(4 rows)
-
--- Same as above, but PARTITION BY clause is included as well
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       depname,
-       min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
-FROM empsalary
-ORDER BY depname, empno, enroll_date;
-                  QUERY PLAN                   
------------------------------------------------
- WindowAgg
-   ->  Sort
-         Sort Key: depname, empno, enroll_date
-         ->  Seq Scan on empsalary
-(4 rows)
-
--- Negation of above, when sort optimization is not needed to be performed
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       depname,
-       min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
-FROM empsalary
-ORDER BY empno, enroll_date;
-               QUERY PLAN                
------------------------------------------
- Sort
-   Sort Key: empno, enroll_date
-   ->  WindowAgg
-         ->  Sort
-               Sort Key: depname, empno
-               ->  Seq Scan on empsalary
-(6 rows)
-
--- ORDER BY's in multiple Window functions can be combined into one
--- if they are subset of QUERY's ORDER BY
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       depname,
-       min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary,
-       sum(salary) OVER (PARTITION BY depname) depsalary,
-       count(*) OVER (ORDER BY enroll_date DESC) c
-FROM empsalary
-ORDER BY depname, empno, enroll_date;
-                      QUERY PLAN                      
-------------------------------------------------------
- WindowAgg
-   ->  WindowAgg
-         ->  Sort
-               Sort Key: depname, empno, enroll_date
-               ->  WindowAgg
-                     ->  Sort
-                           Sort Key: enroll_date DESC
-                           ->  Seq Scan on empsalary
-(8 rows)
-
--- order of window function shouldn't affect query's ORDER BY pushdown
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       depname,
-       count(*) OVER (ORDER BY enroll_date DESC) c,
-       sum(salary) OVER (PARTITION BY depname) depsalary,
-       min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
-FROM empsalary
-ORDER BY depname, empno, enroll_date;
-                      QUERY PLAN                      
-------------------------------------------------------
- WindowAgg
-   ->  WindowAgg
-         ->  Sort
-               Sort Key: depname, empno, enroll_date
-               ->  WindowAgg
-                     ->  Sort
-                           Sort Key: enroll_date DESC
-                           ->  Seq Scan on empsalary
-(8 rows)
-
--- SORT OPTIMIZATION (as in cases above) is not needed to be performed
--- in cases, as mentioned below.
--- Case #1: When distinct clause is included
-EXPLAIN (COSTS OFF)
-SELECT DISTINCT empno,
-       depname,
-       min(salary) OVER (PARTITION BY depname) depminsalary,
-       sum(salary) OVER (PARTITION BY depname) depsalary,
-       count(*) OVER (ORDER BY enroll_date DESC) c
-FROM empsalary
-ORDER BY depname, empno;
-                                              QUERY PLAN                                               
--------------------------------------------------------------------------------------------------------
- Unique
-   ->  Sort
-         Sort Key: depname, empno, (min(salary) OVER (?)), (sum(salary) OVER (?)), (count(*) OVER (?))
-         ->  WindowAgg
-               ->  Sort
-                     Sort Key: depname
-                     ->  WindowAgg
-                           ->  Sort
-                                 Sort Key: enroll_date DESC
-                                 ->  Seq Scan on empsalary
-(10 rows)
-
--- Case #2: When Limit clause is specified
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       depname,
-       min(salary) OVER (PARTITION BY empno) depminsalary,
-       sum(salary) OVER (PARTITION BY empno) depsalary
-FROM empsalary
-ORDER BY empno, depname LIMIT 1;
-                  QUERY PLAN                   
------------------------------------------------
- Limit
-   ->  Incremental Sort
-         Sort Key: empno, depname
-         Presorted Key: empno
-         ->  WindowAgg
-               ->  Sort
-                     Sort Key: empno
-                     ->  Seq Scan on empsalary
-(8 rows)
-
--- Case #3: When ORDER BY contains any WindowFuncs
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       row_number() over (PARTITION BY empno) esalary
-FROM empsalary
-ORDER BY empno, esalary;
-                 QUERY PLAN                 
---------------------------------------------
- Incremental Sort
-   Sort Key: empno, (row_number() OVER (?))
-   Presorted Key: empno
-   ->  WindowAgg
-         ->  Sort
-               Sort Key: empno
-               ->  Seq Scan on empsalary
-(7 rows)
-
--- Case #4: When WindowFunc ORDER BY is superset of QUERY's ORDER BY
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       sum(salary) OVER (PARTITION BY depname) depsalary,
-       min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
-FROM empsalary
-ORDER BY depname;
-               QUERY PLAN                
------------------------------------------
- WindowAgg
-   ->  WindowAgg
-         ->  Sort
-               Sort Key: depname, empno
-               ->  Seq Scan on empsalary
-(5 rows)
-
 -- Test Sort node reordering
 EXPLAIN (COSTS OFF)
 SELECT
diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql
index 665e156920..b7bd0a83da 100644
--- a/src/test/regress/sql/window.sql
+++ b/src/test/regress/sql/window.sql
@@ -1324,110 +1324,8 @@ SELECT DISTINCT
 FROM empsalary
 ORDER BY depname, empno;
 
--- Do not perform sort pushdown if column is presorted
-CREATE INDEX depname_idx ON empsalary(depname);
-SET enable_seqscan=0;
-
-EXPLAIN (COSTS OFF)
-SELECT empno,
-        min(salary) OVER (PARTITION BY depname) depminsalary
-FROM empsalary
-ORDER BY depname, empno;
-
--- Do perform sort pushdown if columns are only partially sorted
-EXPLAIN (COSTS OFF)
-SELECT empno,
-        min(salary) OVER (PARTITION BY depname, empno) depminsalary
-FROM empsalary
-ORDER BY depname, empno, enroll_date;
-
-DROP INDEX depname_idx;
-RESET enable_seqscan;
 RESET enable_hashagg;
 
--- If window function's ORDER BY is subset of QUERY's ORDER BY, sort as per as
--- QUERY's ORDER BY to save additional sorting in the end
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       depname,
-       min(salary) OVER (ORDER BY depname) depminsalary
-FROM empsalary
-ORDER BY depname, empno;
-
--- Same as above, but PARTITION BY clause is included as well
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       depname,
-       min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
-FROM empsalary
-ORDER BY depname, empno, enroll_date;
-
--- Negation of above, when sort optimization is not needed to be performed
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       depname,
-       min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
-FROM empsalary
-ORDER BY empno, enroll_date;
-
--- ORDER BY's in multiple Window functions can be combined into one
--- if they are subset of QUERY's ORDER BY
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       depname,
-       min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary,
-       sum(salary) OVER (PARTITION BY depname) depsalary,
-       count(*) OVER (ORDER BY enroll_date DESC) c
-FROM empsalary
-ORDER BY depname, empno, enroll_date;
-
--- order of window function shouldn't affect query's ORDER BY pushdown
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       depname,
-       count(*) OVER (ORDER BY enroll_date DESC) c,
-       sum(salary) OVER (PARTITION BY depname) depsalary,
-       min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
-FROM empsalary
-ORDER BY depname, empno, enroll_date;
-
--- SORT OPTIMIZATION (as in cases above) is not needed to be performed
--- in cases, as mentioned below.
-
--- Case #1: When distinct clause is included
-EXPLAIN (COSTS OFF)
-SELECT DISTINCT empno,
-       depname,
-       min(salary) OVER (PARTITION BY depname) depminsalary,
-       sum(salary) OVER (PARTITION BY depname) depsalary,
-       count(*) OVER (ORDER BY enroll_date DESC) c
-FROM empsalary
-ORDER BY depname, empno;
-
--- Case #2: When Limit clause is specified
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       depname,
-       min(salary) OVER (PARTITION BY empno) depminsalary,
-       sum(salary) OVER (PARTITION BY empno) depsalary
-FROM empsalary
-ORDER BY empno, depname LIMIT 1;
-
--- Case #3: When ORDER BY contains any WindowFuncs
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       row_number() over (PARTITION BY empno) esalary
-FROM empsalary
-ORDER BY empno, esalary;
-
--- Case #4: When WindowFunc ORDER BY is superset of QUERY's ORDER BY
-EXPLAIN (COSTS OFF)
-SELECT empno,
-       sum(salary) OVER (PARTITION BY depname) depsalary,
-       min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
-FROM empsalary
-ORDER BY depname;
-
 -- Test Sort node reordering
 EXPLAIN (COSTS OFF)
 SELECT

Re: Todo: Teach planner to evaluate multiple windows in the optimal order

Reply via email to