From f2fb47a38003e76aa06b5c6dd2afbeaedef36561 Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Tue, 6 Feb 2024 11:11:21 +0800
Subject: [PATCH v1] review diff

---
 src/backend/optimizer/path/pathkeys.c  | 96 +++++++++-----------------
 src/backend/optimizer/plan/planner.c   | 84 +---------------------
 src/backend/optimizer/prep/prepunion.c | 77 +++++++++++++++++++--
 src/backend/optimizer/util/pathnode.c  | 11 ---
 src/include/optimizer/paths.h          |  2 -
 5 files changed, 104 insertions(+), 166 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 2eb144f1df..e0281ef84c 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -985,71 +985,6 @@ build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel,
 	return retval;
 }
 
-/*
- * build_setop_pathkeys
- *	  Build and return a list of PathKeys, one for each non-junk target in
- *	  'tlist'.
- */
-List *
-build_setop_pathkeys(PlannerInfo *root, SetOperationStmt *op,
-					 Relids relids, List *tlist)
-{
-	ListCell   *lc;
-	ListCell   *sgccell = list_head(op->groupClauses);
-	List	   *retval = NIL;
-
-	foreach(lc, tlist)
-	{
-		TargetEntry *tle = lfirst_node(TargetEntry, lc);
-		SortGroupClause *sgc;
-
-		Oid			opfamily;
-		Oid			opcintype;
-		int16		strategy;
-		PathKey    *cpathkey;
-
-		if (tle->resjunk)
-			continue;
-
-		/*
-		 * XXX query_is_distinct_for() is happy to Assert this, should this do
-		 * that rather than ERROR?
-		 */
-		if (sgccell == NULL)
-			elog(ERROR, "too few group clauses");
-
-		sgc = lfirst_node(SortGroupClause, sgccell);
-
-		/* Find the operator in pg_amop --- failure shouldn't happen */
-		if (!get_ordering_op_properties(sgc->sortop,
-										&opfamily, &opcintype, &strategy))
-			elog(ERROR, "operator %u is not a valid ordering operator",
-				 sgc->eqop);
-
-		cpathkey = make_pathkey_from_sortinfo(root,
-											  tle->expr,
-											  opfamily,
-											  opcintype,
-											  exprCollation((Node *) tle->expr),
-											  false,
-											  sgc->nulls_first,
-											  0,
-											  relids,
-											  true);
-		retval = lappend(retval, cpathkey);
-		sgccell = lnext(op->groupClauses, sgccell);
-
-		/*
-		 * There's no need to look for redundant pathkeys as set operations
-		 * have no ability to have non-child constants in an EquivalenceClass.
-		 * Let's just make sure that remains true.
-		 */
-		Assert(!EC_MUST_BE_REDUNDANT(cpathkey->pk_eclass));
-	}
-
-	return retval;
-}
-
 /*
  * build_expression_pathkey
  *	  Build a pathkeys list that describes an ordering by a single expression
@@ -2251,6 +2186,34 @@ pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
 	return n;
 }
 
+/*
+ * pathkeys_useful_for_setop
+ *		Count the number of pathkeys that are useful for meeting the ordering
+ *		requested by the query's set operation.
+ *
+ * Because we the have the possibility of incremental sort, a prefix list of
+ * keys is potentially useful for improving the performance of the set
+ * operation's ordering. Thus we return 0, if no valuable keys are found, or
+ * the number of leading keys shared by the list and the set operation's
+ * ordering..
+ */
+static int
+pathkeys_useful_for_setop(PlannerInfo *root, List *pathkeys)
+{
+	int			n_common_pathkeys;
+
+	if (root->setop_pathkeys == NIL)
+		return 0;				/* no special setop ordering requested */
+
+	if (pathkeys == NIL)
+		return 0;				/* unordered path */
+
+	(void) pathkeys_count_contained_in(root->setop_pathkeys, pathkeys,
+									   &n_common_pathkeys);
+
+	return n_common_pathkeys;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -2268,6 +2231,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_setop(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 5cb140c049..47006edeae 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1769,13 +1769,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 	foreach(lc, current_rel->pathlist)
 	{
 		Path	   *path = (Path *) lfirst(lc);
-		Path	   *input_path;
-
-		/*
-		 * Keep record of the unmodified path so that we can still tell which
-		 * one is the cheapest_input_path.
-		 */
-		input_path = path;
 
 		/*
 		 * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
@@ -1804,82 +1797,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		}
 
 		/*
-		 * Create paths to suit final sort order required for setop_pathkeys.
-		 * Here we'll sort the cheapest input path (if not sorted already) and
-		 * incremental sort any paths which are partially sorted.  We also
-		 * include the cheapest path as-is so that the set operation can be
-		 * cheaply implemented using a method which does not require the input
-		 * to be sorted.
-		 */
-		if (root->setop_pathkeys != NIL)
-		{
-			Path	   *cheapest_input_path = current_rel->cheapest_total_path;
-			bool		is_sorted;
-			int			presorted_keys;
-
-			is_sorted = pathkeys_count_contained_in(root->setop_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (input_path != cheapest_input_path)
-				{
-					if (presorted_keys == 0 || !enable_incremental_sort)
-						continue;
-				}
-				else
-				{
-					/*
-					 * If this is an INSERT/UPDATE/DELETE/MERGE, add a
-					 * ModifyTable path.
-					 */
-					if (parse->commandType != CMD_SELECT)
-						path = build_final_modify_table_path(root,
-															 final_rel,
-															 path);
-
-					/*
-					 * The union planner might want to try a hash-based method
-					 * of executing the set operation, so let's provide the
-					 * cheapest input path so that it can do that as cheaply
-					 * as possible.  If the cheapest_input_path happens to be
-					 * already correctly sorted then we'll add it to final_rel
-					 * at the end of the loop.
-					 */
-					add_path(final_rel, path);
-				}
-
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 final_rel,
-													 path,
-													 root->setop_pathkeys,
-													 limit_tuples);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 final_rel,
-																 path,
-																 root->setop_pathkeys,
-																 presorted_keys,
-																 limit_tuples);
-			}
-		}
-
-		/*
-		 * If this is an INSERT/UPDATE/DELETE/MERGE, add a ModifyTable path.
+		 * If this is an INSERT/UPDATE/DELETE/MERGE, add the ModifyTable node.
 		 */
 		if (parse->commandType != CMD_SELECT)
 			path = build_final_modify_table_path(root, final_rel, path);
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index dfdf2ec0a9..b0938e0110 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -588,6 +588,74 @@ build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
 	{
 		Path	   *subpath = (Path *) lfirst(lc);
 		List	   *pathkeys;
+		Path	   *cheapest_input_path = final_rel->cheapest_total_path;
+		List	   *setop_pathkeys = rel->subroot->setop_pathkeys;
+		bool		is_sorted;
+		int			presorted_keys;
+
+		/*
+		 * Include the cheapest path as-is so that the set operation can be
+		 * cheaply implemented using a method which does not require the input
+		 * to be sorted.
+		 */
+		if (subpath == cheapest_input_path)
+		{
+			/* Convert subpath's pathkeys to outer representation */
+			pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
+												 make_tlist_from_pathtarget(subpath->pathtarget));
+
+			/* Generate outer path using this subpath */
+			add_path(rel, (Path *) create_subqueryscan_path(root,
+															rel,
+															subpath,
+															trivial_tlist,
+															pathkeys,
+															NULL));
+		}
+
+		/*
+		 * Create paths to suit final sort order required for setop_pathkeys.
+		 * Here we'll sort the cheapest input path (if not sorted already) and
+		 * incremental sort any paths which are partially sorted.
+		 */
+		is_sorted = pathkeys_count_contained_in(setop_pathkeys,
+												subpath->pathkeys,
+												&presorted_keys);
+
+		if (!is_sorted)
+		{
+			double limittuples = rel->subroot->limit_tuples;
+
+			/*
+			 * Try at least sorting the cheapest path and also try
+			 * incrementally sorting any path which is partially sorted
+			 * already (no need to deal with paths which have presorted
+			 * keys when incremental sort is disabled unless it's the
+			 * cheapest input path).
+			 */
+			if (subpath != cheapest_input_path &&
+				(presorted_keys == 0 || !enable_incremental_sort))
+				continue;
+
+			/*
+			 * We've no need to consider both a sort and incremental sort.
+			 * We'll just do a sort if there are no presorted keys and an
+			 * incremental sort when there are presorted keys.
+			 */
+			if (presorted_keys == 0 || !enable_incremental_sort)
+				subpath = (Path *) create_sort_path(root,
+													final_rel,
+													subpath,
+													setop_pathkeys,
+													limittuples);
+			else
+				subpath = (Path *) create_incremental_sort_path(root,
+																final_rel,
+																subpath,
+																setop_pathkeys,
+																presorted_keys,
+																limittuples);
+		}
 
 		/* Convert subpath's pathkeys to outer representation */
 		pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
@@ -690,12 +758,11 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 
 	if (try_sorted)
 	{
-		/* determine the pathkeys for sorting by the whole target list */
-		union_pathkeys = build_setop_pathkeys(root,
-											  op,
-											  bms_make_singleton(0),
-											  tlist);
+		/* Identify the grouping semantics */
 		groupList = generate_setop_grouplist(op, tlist);
+
+		/* Determine the pathkeys for sorting by the whole target list */
+		union_pathkeys = make_pathkeys_for_sortclauses(root, groupList, tlist);
 	}
 
 	/*
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e375cd52b5..64a2cd1045 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -587,22 +587,11 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
 			parent_rel->pathlist = foreach_delete_current(parent_rel->pathlist,
 														  p1);
 
-#ifdef NOT_USED
-
-			/*
-			 * XXX fixme.  When creating the final upper rel paths for queries
-			 * which have setop_pathkey set,  we'll at the cheapest path as-is
-			 * also try sorting the cheapest path. If the costs of each are
-			 * fuzzily the same then we might choose to pfree the cheapest
-			 * path.  That's bad as the sort uses that path.
-			 */
-
 			/*
 			 * Delete the data pointed-to by the deleted cell, if possible
 			 */
 			if (!IsA(old_path, IndexPath))
 				pfree(old_path);
-#endif
 		}
 		else
 		{
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 7dc3507e8d..1165e98c5b 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -222,8 +222,6 @@ extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index,
 								  ScanDirection scandir);
 extern List *build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel,
 									  ScanDirection scandir, bool *partialkeys);
-extern List *build_setop_pathkeys(PlannerInfo *root, SetOperationStmt *op,
-								  Relids relids, List *tlist);
 extern List *build_expression_pathkey(PlannerInfo *root, Expr *expr,
 									  Oid opno,
 									  Relids rel, bool create_it);
-- 
2.31.0

