Le lundi 6 septembre 2021, 11:25:39 CEST Zhihong Yu a écrit :
> On Mon, Sep 6, 2021 at 1:17 AM Ronan Dunklau <ro...@dunklau.fr> wrote:
> > Le vendredi 3 septembre 2021, 22:54:25 CEST David Zhang a écrit :
> > > The following review has been posted through the commitfest application:
> > > make installcheck-world:  tested, failed
> > > Implements feature:       tested, passed
> > > Spec compliant:           not tested
> > > Documentation:            not tested
> > > 
> > > Applied the v6 patch to master branch and ran regression test for
> > 
> > contrib,
> > 
> > > the result was "All tests successful."
> > 
> > What kind of error did you get running make installcheck-world ? If it
> > passed
> > the make check for contrib, I can't see why it would fail running make
> > installcheck-world.
> > 
> > In any case, I just checked and running make installcheck-world doesn't
> > produce any error.
> > 
> > Since HEAD had moved a bit since the last version, I rebased the patch,
> > resulting in the attached v7.
> > 
> > Best regards,
> > 
> > --
> > Ronan Dunklau
> 
> Hi,
> bq. a pushed-down order by could return wrong results.
> 
> Can you briefly summarize the approach for fixing the bug in the
> description ?

Done, let me know what you think about it.

> 
> + * Returns true if it's safe to push down a sort as described by 'pathkey'
> to
> + * the foreign server
> + */
> +bool
> +is_foreign_pathkey(PlannerInfo *root,
> 
> It would be better to name the method which reflects whether pushdown is
> safe. e.g. is_pathkey_safe_for_pushdown.

The convention used here is the same one as in is_foreign_expr and 
is_foreign_param, which are also related to pushdown-safety. 

-- 
Ronan Dunklau
>From 8d0ebd4df2e5e72a8d490a65001c9971516a4337 Mon Sep 17 00:00:00 2001
From: Ronan Dunklau <ronan.dunk...@aiven.io>
Date: Mon, 6 Sep 2021 09:54:43 +0200
Subject: [PATCH v8] Fix orderby handling in postgres_fdw

The logic for pushing down order bys in postgres fdw didn't take into
account the specific operator used, and as such a pushed-down order by
could return wrong results.

This patch looks up the original operator associated to the pathkey
opfamily, and checks that it actually exists on the foreign side.

If it does, the operator is then used to rebuild an equivalent USING
<operator> clause.
---
 contrib/postgres_fdw/deparse.c                | 156 +++++++++++++-----
 .../postgres_fdw/expected/postgres_fdw.out    |  23 +++
 contrib/postgres_fdw/postgres_fdw.c           |  28 ++--
 contrib/postgres_fdw/postgres_fdw.h           |  11 +-
 contrib/postgres_fdw/sql/postgres_fdw.sql     |   8 +
 src/backend/optimizer/path/equivclass.c       |  26 ++-
 src/include/optimizer/paths.h                 |   2 +
 7 files changed, 187 insertions(+), 67 deletions(-)

diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index d98bd66681..fb1b5f9d9b 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -37,9 +37,11 @@
 #include "access/sysattr.h"
 #include "access/table.h"
 #include "catalog/pg_aggregate.h"
+#include "catalog/pg_amop.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_operator.h"
+#include "catalog/pg_opfamily.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_type.h"
 #include "commands/defrem.h"
@@ -47,6 +49,7 @@
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
 #include "optimizer/optimizer.h"
+#include "optimizer/paths.h"
 #include "optimizer/prep.h"
 #include "optimizer/tlist.h"
 #include "parser/parsetree.h"
@@ -182,6 +185,8 @@ static void deparseRangeTblRef(StringInfo buf, PlannerInfo *root,
 							   Index ignore_rel, List **ignore_conds, List **params_list);
 static void deparseAggref(Aggref *node, deparse_expr_cxt *context);
 static void appendGroupByClause(List *tlist, deparse_expr_cxt *context);
+static void appendOrderBySuffix(Oid sortop, Oid sortcoltype, bool nulls_first,
+								deparse_expr_cxt *context);
 static void appendAggOrderBy(List *orderList, List *targetList,
 							 deparse_expr_cxt *context);
 static void appendFunctionName(Oid funcid, deparse_expr_cxt *context);
@@ -1037,6 +1042,41 @@ is_foreign_param(PlannerInfo *root,
 	return false;
 }
 
+/*
+ * Returns true if it's safe to push down a sort as described by 'pathkey' to
+ * the foreign server
+ */
+bool
+is_foreign_pathkey(PlannerInfo *root,
+				   RelOptInfo *baserel,
+				   PathKey *pathkey)
+{
+	EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
+	PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private;
+	Expr	   *em_expr;
+
+	/*
+	 * is_foreign_expr would detect volatile expressions as well, but checking
+	 * ec_has_volatile here saves some cycles.
+	 */
+	if (pathkey_ec->ec_has_volatile)
+		return false;
+
+	/* can't push down the sort if the pathkey's opfamily is not shippable */
+	if (!is_shippable(pathkey->pk_opfamily, OperatorFamilyRelationId, fpinfo))
+		return false;
+
+	em_expr = find_em_expr_for_rel(pathkey_ec, baserel);
+	if (em_expr == NULL)
+		return false;
+
+	/*
+	 * Finally, determine if it's safe to evaluate the found expr on the
+	 * foreign server.
+	 */
+	return is_foreign_expr(root, baserel, em_expr);
+}
+
 /*
  * Convert type OID + typmod info into a type name we can ship to the remote
  * server.  Someplace else had better have verified that this type name is
@@ -3331,6 +3371,45 @@ deparseAggref(Aggref *node, deparse_expr_cxt *context)
 	appendStringInfoChar(buf, ')');
 }
 
+/*
+ * Append the ASC, DESC, USING <OPERATOR> and NULLS FIRST / NULLS LAST part
+ * of the ORDER BY clause
+ */
+static void
+appendOrderBySuffix(Oid sortop, Oid sortcoltype, bool nulls_first,
+					deparse_expr_cxt *context)
+{
+	StringInfo	buf = context->buf;
+	TypeCacheEntry *typentry;
+
+	typentry = lookup_type_cache(sortcoltype, TYPECACHE_LT_OPR | TYPECACHE_GT_OPR);
+
+	if (sortop == typentry->lt_opr)
+		appendStringInfoString(buf, " ASC");
+	else if (sortop == typentry->gt_opr)
+		appendStringInfoString(buf, " DESC");
+	else
+	{
+		HeapTuple	opertup;
+		Form_pg_operator operform;
+
+		appendStringInfoString(buf, " USING ");
+
+		/* Append operator name. */
+		opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(sortop));
+		if (!HeapTupleIsValid(opertup))
+			elog(ERROR, "cache lookup failed for operator %u", sortop);
+		operform = (Form_pg_operator) GETSTRUCT(opertup);
+		deparseOperatorName(buf, operform);
+		ReleaseSysCache(opertup);
+	}
+
+	if (nulls_first)
+		appendStringInfoString(buf, " NULLS FIRST");
+	else
+		appendStringInfoString(buf, " NULLS LAST");
+}
+
 /*
  * Append ORDER BY within aggregate function.
  */
@@ -3346,7 +3425,6 @@ appendAggOrderBy(List *orderList, List *targetList, deparse_expr_cxt *context)
 		SortGroupClause *srt = (SortGroupClause *) lfirst(lc);
 		Node	   *sortexpr;
 		Oid			sortcoltype;
-		TypeCacheEntry *typentry;
 
 		if (!first)
 			appendStringInfoString(buf, ", ");
@@ -3356,32 +3434,8 @@ appendAggOrderBy(List *orderList, List *targetList, deparse_expr_cxt *context)
 										  false, context);
 		sortcoltype = exprType(sortexpr);
 		/* See whether operator is default < or > for datatype */
-		typentry = lookup_type_cache(sortcoltype,
-									 TYPECACHE_LT_OPR | TYPECACHE_GT_OPR);
-		if (srt->sortop == typentry->lt_opr)
-			appendStringInfoString(buf, " ASC");
-		else if (srt->sortop == typentry->gt_opr)
-			appendStringInfoString(buf, " DESC");
-		else
-		{
-			HeapTuple	opertup;
-			Form_pg_operator operform;
-
-			appendStringInfoString(buf, " USING ");
-
-			/* Append operator name. */
-			opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(srt->sortop));
-			if (!HeapTupleIsValid(opertup))
-				elog(ERROR, "cache lookup failed for operator %u", srt->sortop);
-			operform = (Form_pg_operator) GETSTRUCT(opertup);
-			deparseOperatorName(buf, operform);
-			ReleaseSysCache(opertup);
-		}
-
-		if (srt->nulls_first)
-			appendStringInfoString(buf, " NULLS FIRST");
-		else
-			appendStringInfoString(buf, " NULLS LAST");
+		appendOrderBySuffix(srt->sortop, sortcoltype, srt->nulls_first,
+							context);
 	}
 }
 
@@ -3486,7 +3540,11 @@ appendOrderByClause(List *pathkeys, bool has_final_sort,
 	foreach(lcell, pathkeys)
 	{
 		PathKey    *pathkey = lfirst(lcell);
+		EquivalenceMember *em;
 		Expr	   *em_expr;
+		HeapTuple	tuple;
+		Oid			oprid;
+		bool		isNull;
 
 		if (has_final_sort)
 		{
@@ -3494,26 +3552,44 @@ appendOrderByClause(List *pathkeys, bool has_final_sort,
 			 * By construction, context->foreignrel is the input relation to
 			 * the final sort.
 			 */
-			em_expr = find_em_expr_for_input_target(context->root,
-													pathkey->pk_eclass,
-													context->foreignrel->reltarget);
+			em = find_em_for_input_target(context->root,
+										  pathkey->pk_eclass,
+										  context->foreignrel->reltarget);
 		}
 		else
-			em_expr = find_em_expr_for_rel(pathkey->pk_eclass, baserel);
+			em = find_em_for_rel(pathkey->pk_eclass, baserel);
+
+		em_expr = em->em_expr;
+
+		/*
+		 * Lookup the operator corresponding to the strategy in the opclass.
+		 * The datatype used by the opfamily is not necessarily the same as
+		 * the expression type (for array types for example).
+		 */
+		tuple = SearchSysCache4(AMOPSTRATEGY,
+								ObjectIdGetDatum(pathkey->pk_opfamily),
+								ObjectIdGetDatum(em->em_datatype),
+								ObjectIdGetDatum(em->em_datatype),
+								Int16GetDatum(pathkey->pk_strategy));
+
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+				 pathkey->pk_strategy, em->em_datatype, em->em_datatype,
+				 pathkey->pk_opfamily);
 
-		Assert(em_expr != NULL);
+		oprid = DatumGetObjectId(SysCacheGetAttr(AMOPSTRATEGY, tuple,
+												 Anum_pg_amop_amopopr, &isNull));
+		ReleaseSysCache(tuple);
 
 		appendStringInfoString(buf, delim);
 		deparseExpr(em_expr, context);
-		if (pathkey->pk_strategy == BTLessStrategyNumber)
-			appendStringInfoString(buf, " ASC");
-		else
-			appendStringInfoString(buf, " DESC");
 
-		if (pathkey->pk_nulls_first)
-			appendStringInfoString(buf, " NULLS FIRST");
-		else
-			appendStringInfoString(buf, " NULLS LAST");
+		/*
+		 * Here we need to use the expression type to compare against the
+		 * default btree sort operator.
+		 */
+		appendOrderBySuffix(oprid, exprType((Node *) em_expr),
+							pathkey->pk_nulls_first, context);
 
 		delim = ", ";
 	}
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index e3ee30f1aa..1f44c365ef 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -3258,6 +3258,19 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
          Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
 (6 rows)
 
+-- Ensure that we don't push down an ORDER BY with a non-shippable operator
+explain (verbose, costs off)
+select * from ft2 order by c1 using operator(public.<^);
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Sort
+   Output: c1, c2, c3, c4, c5, c6, c7, c8
+   Sort Key: ft2.c1 USING <^
+   ->  Foreign Scan on public.ft2
+         Output: c1, c2, c3, c4, c5, c6, c7, c8
+         Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
+(6 rows)
+
 -- Update local stats on ft2
 ANALYZE ft2;
 -- Add into extension
@@ -3285,6 +3298,16 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
  {6,16,26,36,46,56,66,76,86,96}
 (1 row)
 
+-- Ensure that the ORDER BY is pushed to the foreign server
+explain (verbose, costs off)
+select * from ft2 order by c1 using operator(public.<^);
+                                                         QUERY PLAN                                                          
+-----------------------------------------------------------------------------------------------------------------------------
+ Foreign Scan on public.ft2
+   Output: c1, c2, c3, c4, c5, c6, c7, c8
+   Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY "C 1" USING OPERATOR(public.<^) NULLS LAST
+(3 rows)
+
 -- Remove from extension
 alter extension postgres_fdw drop operator class my_op_class using btree;
 alter extension postgres_fdw drop function my_op_cmp(a int, b int);
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 4bdab30a73..1dd4779185 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -917,8 +917,6 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
 		foreach(lc, root->query_pathkeys)
 		{
 			PathKey    *pathkey = (PathKey *) lfirst(lc);
-			EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
-			Expr	   *em_expr;
 
 			/*
 			 * The planner and executor don't have any clever strategy for
@@ -926,13 +924,8 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
 			 * getting it to be sorted by all of those pathkeys. We'll just
 			 * end up resorting the entire data set.  So, unless we can push
 			 * down all of the query pathkeys, forget it.
-			 *
-			 * is_foreign_expr would detect volatile expressions as well, but
-			 * checking ec_has_volatile here saves some cycles.
 			 */
-			if (pathkey_ec->ec_has_volatile ||
-				!(em_expr = find_em_expr_for_rel(pathkey_ec, rel)) ||
-				!is_foreign_expr(root, rel, em_expr))
+			if (!is_foreign_pathkey(root, rel, pathkey))
 			{
 				query_pathkeys_ok = false;
 				break;
@@ -6540,9 +6533,9 @@ add_foreign_ordered_paths(PlannerInfo *root, RelOptInfo *input_rel,
 			return;
 
 		/* Get the sort expression for the pathkey_ec */
-		sort_expr = find_em_expr_for_input_target(root,
-												  pathkey_ec,
-												  input_rel->reltarget);
+		sort_expr = find_em_for_input_target(root,
+											 pathkey_ec,
+											 input_rel->reltarget)->em_expr;
 
 		/* If it's unsafe to remote, we cannot push down the final sort */
 		if (!is_foreign_expr(root, input_rel, sort_expr))
@@ -7332,13 +7325,12 @@ conversion_error_callback(void *arg)
 }
 
 /*
- * Find an equivalence class member expression to be computed as a sort column
- * in the given target.
+ * Find an equivalence class member to be computed as a sort column in the
+ * given target.
  */
-Expr *
-find_em_expr_for_input_target(PlannerInfo *root,
-							  EquivalenceClass *ec,
-							  PathTarget *target)
+EquivalenceMember *
+find_em_for_input_target(PlannerInfo *root, EquivalenceClass *ec,
+						 PathTarget *target)
 {
 	ListCell   *lc1;
 	int			i;
@@ -7383,7 +7375,7 @@ find_em_expr_for_input_target(PlannerInfo *root,
 				em_expr = ((RelabelType *) em_expr)->arg;
 
 			if (equal(em_expr, expr))
-				return em->em_expr;
+				return em;
 		}
 
 		i++;
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index ca83306af9..9930226b6f 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -171,6 +171,9 @@ extern bool is_foreign_expr(PlannerInfo *root,
 extern bool is_foreign_param(PlannerInfo *root,
 							 RelOptInfo *baserel,
 							 Expr *expr);
+extern bool is_foreign_pathkey(PlannerInfo *root,
+							   RelOptInfo *baserel,
+							   PathKey *pathkey);
 extern void deparseInsertSql(StringInfo buf, RangeTblEntry *rte,
 							 Index rtindex, Relation rel,
 							 List *targetAttrs, bool doNothing,
@@ -213,10 +216,10 @@ extern void deparseTruncateSql(StringInfo buf,
 							   DropBehavior behavior,
 							   bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
-extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
-extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
-										   EquivalenceClass *ec,
-										   PathTarget *target);
+extern EquivalenceMember *find_em_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
+extern EquivalenceMember *find_em_for_input_target(PlannerInfo *root,
+												   EquivalenceClass *ec,
+												   PathTarget *target);
 extern List *build_tlist_to_deparse(RelOptInfo *foreignrel);
 extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root,
 									RelOptInfo *foreignrel, List *tlist,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 30b5175da5..1bbe1212c4 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -902,6 +902,10 @@ create operator class my_op_class for type int using btree family my_op_family a
 explain (verbose, costs off)
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
 
+-- Ensure that we don't push down an ORDER BY with a non-shippable operator
+explain (verbose, costs off)
+select * from ft2 order by c1 using operator(public.<^);
+
 -- Update local stats on ft2
 ANALYZE ft2;
 
@@ -919,6 +923,10 @@ explain (verbose, costs off)
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
 
+-- Ensure that the ORDER BY is pushed to the foreign server
+explain (verbose, costs off)
+select * from ft2 order by c1 using operator(public.<^);
+
 -- Remove from extension
 alter extension postgres_fdw drop operator class my_op_class using btree;
 alter extension postgres_fdw drop function my_op_cmp(a int, b int);
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 6f1abbe47d..b367f18eab 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -932,11 +932,11 @@ is_exprlist_member(Expr *node, List *exprs)
 }
 
 /*
- * Find an equivalence class member expression, all of whose Vars, come from
- * the indicated relation.
+ * Find an equivalence class member, all of whose Vars, come from the
+ * indicated relation.
  */
-Expr *
-find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
+EquivalenceMember *
+find_em_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
 {
 	ListCell   *lc_em;
 
@@ -952,7 +952,7 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
 			 * taken entirely from this relation, we'll be content to choose
 			 * any one of those.
 			 */
-			return em->em_expr;
+			return em;
 		}
 	}
 
@@ -960,6 +960,22 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
 	return NULL;
 }
 
+/*
+ * Find an equivalence class member expression, all of whose Vars, come from
+ * the indicated relation.
+ */
+Expr *
+find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
+{
+	EquivalenceMember *em = find_em_for_rel(ec, rel);
+
+	if (em != NULL)
+		return em->em_expr;
+
+	return NULL;
+}
+
+
 /*
  * relation_can_be_sorted_early
  *		Can this relation be sorted on this EC before the final output step?
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index f1d111063c..5a2bbc87e1 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -144,6 +144,8 @@ extern EquivalenceMember *find_computable_ec_member(PlannerInfo *root,
 													Relids relids,
 													bool require_parallel_safe);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
+extern EquivalenceMember *find_em_for_rel(EquivalenceClass *ec,
+										  RelOptInfo *rel);
 extern bool relation_can_be_sorted_early(PlannerInfo *root, RelOptInfo *rel,
 										 EquivalenceClass *ec,
 										 bool require_parallel_safe);
-- 
2.33.0

Reply via email to