Le mercredi 21 juillet 2021, 11:05:14 CEST Ronan Dunklau a écrit :
> Le mercredi 21 juillet 2021, 04:25:00 CEST David Rowley a écrit :
> > Here the test claims that it wants to ensure that the order by using
> > operator(public.<^) is not pushed down into the foreign scan.
> > However, unless I'm mistaken, it seems there's a completely wrong
> > assumption there that the planner would even attempt that.  In current
> > master we don't add PathKeys for ORDER BY aggregates, why would that
> > sort get pushed down in the first place?
> 
> The whole aggregate, including it's order by clause, can be pushed down so
> there is nothing related to pathkeys here.
> 
> > If I adjust that query to something that would have the planner set
> > pathkeys for, it does push the ORDER BY to the foreign server without
> > any consideration that the sort operator is not shippable to the
> > foreign server.
> > 
> > Am I missing something here, or is postgres_fdw.c's
> > get_useful_pathkeys_for_relation() just broken?
> 
> I think you're right, we need to add a check if the opfamily is shippable.
> I'll submit a patch for that including regression tests.
> 

In fact the support for generating the correct USING clause was inexistent 
too, so that needed a bit more work.

The attached patch does the following:
  - verify the opfamily is shippable to keep pathkeys
  - generate a correct order by clause using the actual operator.

The second part needed a bit of refactoring: the find_em_expr_for_input_target 
and find_em_expr_for_rel need to return the whole EquivalenceMember, because we 
can't know the type used by the opfamily from the expression (example: the 
expression could be of type intarray, but the type used by the opfamily could 
be anyarray).

I also moved the "USING <operator>"' string generation to a separate function 
since it's now used by appendAggOrderBy and appendOrderByClause.

The find_em_expr_for_rel is exposed in optimizer/paths.h, so I kept the 
existing function which returns the expr directly in case it is used out of 
tree. 



-- 
Ronan Dunklau
commit 2376cc6656853987e8f08e9b8f444bf391a9c269
Author: Ronan Dunklau <ronan.dunk...@aiven.io>
Date:   Wed Jul 21 12:44:41 2021 +0200

    Fix postgres_fdw PathKey's handling.
    
    The operator family being used for the sort was completely
    ignored, and as such its existence on the foreign server was not
    checked.

diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 31919fda8c..2c986d3325 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -37,9 +37,11 @@
 #include "access/sysattr.h"
 #include "access/table.h"
 #include "catalog/pg_aggregate.h"
+#include "catalog/pg_amop.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_operator.h"
+#include "catalog/pg_opfamily.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_type.h"
 #include "commands/defrem.h"
@@ -180,6 +182,7 @@ static void deparseRangeTblRef(StringInfo buf, PlannerInfo 
*root,
                                                           Index ignore_rel, 
List **ignore_conds, List **params_list);
 static void deparseAggref(Aggref *node, deparse_expr_cxt *context);
 static void appendGroupByClause(List *tlist, deparse_expr_cxt *context);
+static void appendOrderbyUsingClause(Oid sortop, Oid sortcoltype, 
deparse_expr_cxt *context);
 static void appendAggOrderBy(List *orderList, List *targetList,
                                                         deparse_expr_cxt 
*context);
 static void appendFunctionName(Oid funcid, deparse_expr_cxt *context);
@@ -910,6 +913,26 @@ is_foreign_param(PlannerInfo *root,
        return false;
 }
 
+/* Returns true if the given pathkey can be evaluated on the remote side
+ */
+bool
+is_foreign_pathkey(PlannerInfo *root,
+                                  RelOptInfo *baserel,
+                                  PathKey *pathkey)
+{
+       EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
+       EquivalenceMember *em;
+       PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private;
+
+       if (pathkey_ec->ec_has_volatile)
+               return false;
+       return (!pathkey_ec->ec_has_volatile &&
+                       (em = find_em_for_rel(pathkey_ec, baserel)) &&
+                       is_foreign_expr(root, baserel, em->em_expr) &&
+                       is_shippable(pathkey->pk_opfamily, 
OperatorFamilyRelationId, fpinfo));
+}
+
+
 /*
  * Convert type OID + typmod info into a type name we can ship to the remote
  * server.  Someplace else had better have verified that this type name is
@@ -3125,6 +3148,37 @@ deparseAggref(Aggref *node, deparse_expr_cxt *context)
        appendStringInfoChar(buf, ')');
 }
 
+/*
+ * Append the USING <OPERATOR> part of an ORDER BY clause
+ */
+static void
+appendOrderbyUsingClause(Oid sortop, Oid sortcoltype, deparse_expr_cxt 
*context)
+{
+       StringInfo      buf = context->buf;
+       TypeCacheEntry *typentry;
+       typentry = lookup_type_cache(sortcoltype, TYPECACHE_LT_OPR | 
TYPECACHE_GT_OPR);
+
+       if (sortop == typentry->lt_opr)
+               appendStringInfoString(buf, " ASC");
+       else if (sortop == typentry->gt_opr)
+               appendStringInfoString(buf, " DESC");
+       else
+       {
+               HeapTuple       opertup;
+               Form_pg_operator operform;
+
+               appendStringInfoString(buf, " USING ");
+
+               /* Append operator name. */
+               opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(sortop));
+               if (!HeapTupleIsValid(opertup))
+                       elog(ERROR, "cache lookup failed for operator %u", 
sortop);
+               operform = (Form_pg_operator) GETSTRUCT(opertup);
+               deparseOperatorName(buf, operform);
+               ReleaseSysCache(opertup);
+       }
+}
+
 /*
  * Append ORDER BY within aggregate function.
  */
@@ -3140,7 +3194,6 @@ appendAggOrderBy(List *orderList, List *targetList, 
deparse_expr_cxt *context)
                SortGroupClause *srt = (SortGroupClause *) lfirst(lc);
                Node       *sortexpr;
                Oid                     sortcoltype;
-               TypeCacheEntry *typentry;
 
                if (!first)
                        appendStringInfoString(buf, ", ");
@@ -3150,27 +3203,7 @@ appendAggOrderBy(List *orderList, List *targetList, 
deparse_expr_cxt *context)
                                                                                
  false, context);
                sortcoltype = exprType(sortexpr);
                /* See whether operator is default < or > for datatype */
-               typentry = lookup_type_cache(sortcoltype,
-                                                                        
TYPECACHE_LT_OPR | TYPECACHE_GT_OPR);
-               if (srt->sortop == typentry->lt_opr)
-                       appendStringInfoString(buf, " ASC");
-               else if (srt->sortop == typentry->gt_opr)
-                       appendStringInfoString(buf, " DESC");
-               else
-               {
-                       HeapTuple       opertup;
-                       Form_pg_operator operform;
-
-                       appendStringInfoString(buf, " USING ");
-
-                       /* Append operator name. */
-                       opertup = SearchSysCache1(OPEROID, 
ObjectIdGetDatum(srt->sortop));
-                       if (!HeapTupleIsValid(opertup))
-                               elog(ERROR, "cache lookup failed for operator 
%u", srt->sortop);
-                       operform = (Form_pg_operator) GETSTRUCT(opertup);
-                       deparseOperatorName(buf, operform);
-                       ReleaseSysCache(opertup);
-               }
+               appendOrderbyUsingClause(srt->sortop, sortcoltype, context);
 
                if (srt->nulls_first)
                        appendStringInfoString(buf, " NULLS FIRST");
@@ -3280,7 +3313,11 @@ appendOrderByClause(List *pathkeys, bool has_final_sort,
        foreach(lcell, pathkeys)
        {
                PathKey    *pathkey = lfirst(lcell);
+               EquivalenceMember *em;
                Expr       *em_expr;
+               HeapTuple       tuple;
+               Oid                     oprid;
+               bool            isNull;
 
                if (has_final_sort)
                {
@@ -3288,21 +3325,44 @@ appendOrderByClause(List *pathkeys, bool has_final_sort,
                         * By construction, context->foreignrel is the input 
relation to
                         * the final sort.
                         */
-                       em_expr = find_em_expr_for_input_target(context->root,
-                                                                               
                        pathkey->pk_eclass,
-                                                                               
                        context->foreignrel->reltarget);
+                       em = find_em_for_input_target(context->root,
+                                                                               
  pathkey->pk_eclass,
+                                                                               
  context->foreignrel->reltarget);
                }
                else
-                       em_expr = find_em_expr_for_rel(pathkey->pk_eclass, 
baserel);
+                       em = find_em_for_rel(pathkey->pk_eclass, baserel);
+               em_expr = em->em_expr;
+
+               /*
+                * Lookup the operator corresponding to the strategy in the 
opclass.
+                * The datatype used by the opfamily is not necessarily the 
same as
+                * the expression type (for array types for example).
+                */
+               tuple = SearchSysCache4(AMOPSTRATEGY,
+                                                               
ObjectIdGetDatum(pathkey->pk_opfamily),
+                                                               em->em_datatype,
+                                                               em->em_datatype,
+                                                               
pathkey->pk_strategy);
+               if (!HeapTupleIsValid(tuple))
+               {
+                       elog(ERROR, "cache lookup failed for operator family 
%u", pathkey->pk_opfamily);
+               }
+
+               oprid = DatumGetObjectId(SysCacheGetAttr(AMOPSTRATEGY, tuple,
+                                                                               
                 Anum_pg_amop_amopopr, &isNull));
+               ReleaseSysCache(tuple);
+
 
                Assert(em_expr != NULL);
 
                appendStringInfoString(buf, delim);
                deparseExpr(em_expr, context);
-               if (pathkey->pk_strategy == BTLessStrategyNumber)
-                       appendStringInfoString(buf, " ASC");
-               else
-                       appendStringInfoString(buf, " DESC");
+
+               /*
+                * Here we need to use the expression type to compare against 
the
+                * default btree sort operator
+                */
+               appendOrderbyUsingClause(oprid, exprType((Node *) em_expr), 
context);
 
                if (pathkey->pk_nulls_first)
                        appendStringInfoString(buf, " NULLS FIRST");
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out 
b/contrib/postgres_fdw/expected/postgres_fdw.out
index ed25e7a743..66bada908f 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -3168,6 +3168,18 @@ select array_agg(c1 order by c1 using 
operator(public.<^)) from ft2 where c2 = 6
          Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) 
AND ((c2 = 6))
 (6 rows)
 
+-- This will not be pushed either
+explain verbose select * from ft2 order by c1 using operator(public.<^);
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Sort  (cost=190.83..193.33 rows=1000 width=142)
+   Output: c1, c2, c3, c4, c5, c6, c7, c8
+   Sort Key: ft2.c1 USING <^
+   ->  Foreign Scan on public.ft2  (cost=100.00..141.00 rows=1000 width=47)
+         Output: c1, c2, c3, c4, c5, c6, c7, c8
+         Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
+(6 rows)
+
 -- Update local stats on ft2
 ANALYZE ft2;
 -- Add into extension
@@ -3195,6 +3207,15 @@ select array_agg(c1 order by c1 using 
operator(public.<^)) from ft2 where c2 = 6
  {6,16,26,36,46,56,66,76,86,96}
 (1 row)
 
+-- And this will be pushed too
+explain verbose select * from ft2 order by c1 using operator(public.<^);
+                                                         QUERY PLAN            
                                              
+-----------------------------------------------------------------------------------------------------------------------------
+ Foreign Scan on public.ft2  (cost=170.83..193.33 rows=1000 width=47)
+   Output: c1, c2, c3, c4, c5, c6, c7, c8
+   Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER 
BY "C 1" USING OPERATOR(public.<^) NULLS LAST
+(3 rows)
+
 -- Remove from extension
 alter extension postgres_fdw drop operator class my_op_class using btree;
 alter extension postgres_fdw drop function my_op_cmp(a int, b int);
diff --git a/contrib/postgres_fdw/postgres_fdw.c 
b/contrib/postgres_fdw/postgres_fdw.c
index f15c97ad7a..9922ee614c 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -916,8 +916,6 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, 
RelOptInfo *rel)
                foreach(lc, root->query_pathkeys)
                {
                        PathKey    *pathkey = (PathKey *) lfirst(lc);
-                       EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
-                       Expr       *em_expr;
 
                        /*
                         * The planner and executor don't have any clever 
strategy for
@@ -929,9 +927,7 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, 
RelOptInfo *rel)
                         * is_foreign_expr would detect volatile expressions as 
well, but
                         * checking ec_has_volatile here saves some cycles.
                         */
-                       if (pathkey_ec->ec_has_volatile ||
-                               !(em_expr = find_em_expr_for_rel(pathkey_ec, 
rel)) ||
-                               !is_foreign_expr(root, rel, em_expr))
+                       if (!is_foreign_pathkey(root, rel, pathkey))
                        {
                                query_pathkeys_ok = false;
                                break;
@@ -6510,9 +6506,9 @@ add_foreign_ordered_paths(PlannerInfo *root, RelOptInfo 
*input_rel,
                        return;
 
                /* Get the sort expression for the pathkey_ec */
-               sort_expr = find_em_expr_for_input_target(root,
+               sort_expr = find_em_for_input_target(root,
                                                                                
                  pathkey_ec,
-                                                                               
                  input_rel->reltarget);
+                                                                               
                  input_rel->reltarget)->em_expr;
 
                /* If it's unsafe to remote, we cannot push down the final sort 
*/
                if (!is_foreign_expr(root, input_rel, sort_expr))
@@ -7250,11 +7246,11 @@ conversion_error_callback(void *arg)
 }
 
 /*
- * Find an equivalence class member expression to be computed as a sort column
+ * Find an equivalence class member to be computed as a sort column
  * in the given target.
  */
-Expr *
-find_em_expr_for_input_target(PlannerInfo *root,
+EquivalenceMember*
+find_em_for_input_target(PlannerInfo *root,
                                                          EquivalenceClass *ec,
                                                          PathTarget *target)
 {
@@ -7301,7 +7297,7 @@ find_em_expr_for_input_target(PlannerInfo *root,
                                em_expr = ((RelabelType *) em_expr)->arg;
 
                        if (equal(em_expr, expr))
-                               return em->em_expr;
+                               return em;
                }
 
                i++;
diff --git a/contrib/postgres_fdw/postgres_fdw.h 
b/contrib/postgres_fdw/postgres_fdw.h
index 9591c0f6c2..a0d5c859da 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -165,12 +165,17 @@ extern void classifyConditions(PlannerInfo *root,
                                                           List *input_conds,
                                                           List **remote_conds,
                                                           List **local_conds);
+
 extern bool is_foreign_expr(PlannerInfo *root,
                                                        RelOptInfo *baserel,
                                                        Expr *expr);
 extern bool is_foreign_param(PlannerInfo *root,
                                                         RelOptInfo *baserel,
                                                         Expr *expr);
+extern bool is_foreign_pathkey(PlannerInfo *root,
+                                                          RelOptInfo *baserel,
+                                                          PathKey *pathkey);
+
 extern void deparseInsertSql(StringInfo buf, RangeTblEntry *rte,
                                                         Index rtindex, 
Relation rel,
                                                         List *targetAttrs, 
bool doNothing,
@@ -212,8 +217,8 @@ extern void deparseTruncateSql(StringInfo buf,
                                                           DropBehavior 
behavior,
                                                           bool restart_seqs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
-extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
-extern Expr *find_em_expr_for_input_target(PlannerInfo *root,
+extern EquivalenceMember *find_em_for_rel(EquivalenceClass *ec, RelOptInfo 
*rel);
+extern EquivalenceMember *find_em_for_input_target(PlannerInfo *root,
                                                                                
   EquivalenceClass *ec,
                                                                                
   PathTarget *target);
 extern List *build_tlist_to_deparse(RelOptInfo *foreignrel);
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql 
b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 02a6b15a13..841fac84e4 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -873,6 +873,9 @@ create operator class my_op_class for type int using btree 
family my_op_family a
 explain (verbose, costs off)
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 
6 and c1 < 100 group by c2;
 
+-- This will not be pushed either
+explain verbose select * from ft2 order by c1 using operator(public.<^);
+
 -- Update local stats on ft2
 ANALYZE ft2;
 
@@ -890,6 +893,9 @@ explain (verbose, costs off)
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 
6 and c1 < 100 group by c2;
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 
6 and c1 < 100 group by c2;
 
+-- And this will be pushed too
+explain verbose select * from ft2 order by c1 using operator(public.<^);
+
 -- Remove from extension
 alter extension postgres_fdw drop operator class my_op_class using btree;
 alter extension postgres_fdw drop function my_op_cmp(a int, b int);
diff --git a/src/backend/optimizer/path/equivclass.c 
b/src/backend/optimizer/path/equivclass.c
index 6f1abbe47d..d2eac89528 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -935,8 +935,8 @@ is_exprlist_member(Expr *node, List *exprs)
  * Find an equivalence class member expression, all of whose Vars, come from
  * the indicated relation.
  */
-Expr *
-find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
+EquivalenceMember*
+find_em_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
 {
        ListCell   *lc_em;
 
@@ -952,7 +952,7 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
                         * taken entirely from this relation, we'll be content 
to choose
                         * any one of those.
                         */
-                       return em->em_expr;
+                       return em;
                }
        }
 
@@ -960,6 +960,14 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
        return NULL;
 }
 
+Expr *
+find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
+{
+       EquivalenceMember *em = find_em_for_rel(ec, rel);
+       return em ? em->em_expr : NULL;
+}
+
+
 /*
  * relation_can_be_sorted_early
  *             Can this relation be sorted on this EC before the final output 
step?
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index f1d111063c..bc17114ba3 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -144,6 +144,7 @@ extern EquivalenceMember 
*find_computable_ec_member(PlannerInfo *root,
                                                                                
                        Relids relids,
                                                                                
                        bool require_parallel_safe);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
+extern EquivalenceMember *find_em_for_rel(EquivalenceClass *ec, RelOptInfo 
*rel);
 extern bool relation_can_be_sorted_early(PlannerInfo *root, RelOptInfo *rel,
                                                                                
 EquivalenceClass *ec,
                                                                                
 bool require_parallel_safe);

Reply via email to