Le vendredi 3 septembre 2021, 22:54:25 CEST David Zhang a écrit : > The following review has been posted through the commitfest application: > make installcheck-world: tested, failed > Implements feature: tested, passed > Spec compliant: not tested > Documentation: not tested > > Applied the v6 patch to master branch and ran regression test for contrib, > the result was "All tests successful."
What kind of error did you get running make installcheck-world ? If it passed the make check for contrib, I can't see why it would fail running make installcheck-world. In any case, I just checked and running make installcheck-world doesn't produce any error. Since HEAD had moved a bit since the last version, I rebased the patch, resulting in the attached v7. Best regards, -- Ronan Dunklau
>From ccfb0a3d7dae42f56c699aac6f699c3c2f08c812 Mon Sep 17 00:00:00 2001 From: Ronan Dunklau <ronan.dunk...@aiven.io> Date: Mon, 6 Sep 2021 09:54:43 +0200 Subject: [PATCH v7] Fix orderby handling in postgres_fdw The logic for pushing down order bys in postgres fdw didn't take into account the specific operator used, and as such a pushed-down order by could return wrong results. --- contrib/postgres_fdw/deparse.c | 156 +++++++++++++----- .../postgres_fdw/expected/postgres_fdw.out | 23 +++ contrib/postgres_fdw/postgres_fdw.c | 28 ++-- contrib/postgres_fdw/postgres_fdw.h | 11 +- contrib/postgres_fdw/sql/postgres_fdw.sql | 8 + src/backend/optimizer/path/equivclass.c | 26 ++- src/include/optimizer/paths.h | 2 + 7 files changed, 187 insertions(+), 67 deletions(-) diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index d98bd66681..fb1b5f9d9b 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -37,9 +37,11 @@ #include "access/sysattr.h" #include "access/table.h" #include "catalog/pg_aggregate.h" +#include "catalog/pg_amop.h" #include "catalog/pg_collation.h" #include "catalog/pg_namespace.h" #include "catalog/pg_operator.h" +#include "catalog/pg_opfamily.h" #include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "commands/defrem.h" @@ -47,6 +49,7 @@ #include "nodes/nodeFuncs.h" #include "nodes/plannodes.h" #include "optimizer/optimizer.h" +#include "optimizer/paths.h" #include "optimizer/prep.h" #include "optimizer/tlist.h" #include "parser/parsetree.h" @@ -182,6 +185,8 @@ static void deparseRangeTblRef(StringInfo buf, PlannerInfo *root, Index ignore_rel, List **ignore_conds, List **params_list); static void deparseAggref(Aggref *node, deparse_expr_cxt *context); static void appendGroupByClause(List *tlist, deparse_expr_cxt *context); +static void appendOrderBySuffix(Oid sortop, Oid sortcoltype, bool nulls_first, + deparse_expr_cxt *context); static void appendAggOrderBy(List *orderList, List *targetList, deparse_expr_cxt *context); static void appendFunctionName(Oid funcid, deparse_expr_cxt *context); @@ -1037,6 +1042,41 @@ is_foreign_param(PlannerInfo *root, return false; } +/* + * Returns true if it's safe to push down a sort as described by 'pathkey' to + * the foreign server + */ +bool +is_foreign_pathkey(PlannerInfo *root, + RelOptInfo *baserel, + PathKey *pathkey) +{ + EquivalenceClass *pathkey_ec = pathkey->pk_eclass; + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private; + Expr *em_expr; + + /* + * is_foreign_expr would detect volatile expressions as well, but checking + * ec_has_volatile here saves some cycles. + */ + if (pathkey_ec->ec_has_volatile) + return false; + + /* can't push down the sort if the pathkey's opfamily is not shippable */ + if (!is_shippable(pathkey->pk_opfamily, OperatorFamilyRelationId, fpinfo)) + return false; + + em_expr = find_em_expr_for_rel(pathkey_ec, baserel); + if (em_expr == NULL) + return false; + + /* + * Finally, determine if it's safe to evaluate the found expr on the + * foreign server. + */ + return is_foreign_expr(root, baserel, em_expr); +} + /* * Convert type OID + typmod info into a type name we can ship to the remote * server. Someplace else had better have verified that this type name is @@ -3331,6 +3371,45 @@ deparseAggref(Aggref *node, deparse_expr_cxt *context) appendStringInfoChar(buf, ')'); } +/* + * Append the ASC, DESC, USING <OPERATOR> and NULLS FIRST / NULLS LAST part + * of the ORDER BY clause + */ +static void +appendOrderBySuffix(Oid sortop, Oid sortcoltype, bool nulls_first, + deparse_expr_cxt *context) +{ + StringInfo buf = context->buf; + TypeCacheEntry *typentry; + + typentry = lookup_type_cache(sortcoltype, TYPECACHE_LT_OPR | TYPECACHE_GT_OPR); + + if (sortop == typentry->lt_opr) + appendStringInfoString(buf, " ASC"); + else if (sortop == typentry->gt_opr) + appendStringInfoString(buf, " DESC"); + else + { + HeapTuple opertup; + Form_pg_operator operform; + + appendStringInfoString(buf, " USING "); + + /* Append operator name. */ + opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(sortop)); + if (!HeapTupleIsValid(opertup)) + elog(ERROR, "cache lookup failed for operator %u", sortop); + operform = (Form_pg_operator) GETSTRUCT(opertup); + deparseOperatorName(buf, operform); + ReleaseSysCache(opertup); + } + + if (nulls_first) + appendStringInfoString(buf, " NULLS FIRST"); + else + appendStringInfoString(buf, " NULLS LAST"); +} + /* * Append ORDER BY within aggregate function. */ @@ -3346,7 +3425,6 @@ appendAggOrderBy(List *orderList, List *targetList, deparse_expr_cxt *context) SortGroupClause *srt = (SortGroupClause *) lfirst(lc); Node *sortexpr; Oid sortcoltype; - TypeCacheEntry *typentry; if (!first) appendStringInfoString(buf, ", "); @@ -3356,32 +3434,8 @@ appendAggOrderBy(List *orderList, List *targetList, deparse_expr_cxt *context) false, context); sortcoltype = exprType(sortexpr); /* See whether operator is default < or > for datatype */ - typentry = lookup_type_cache(sortcoltype, - TYPECACHE_LT_OPR | TYPECACHE_GT_OPR); - if (srt->sortop == typentry->lt_opr) - appendStringInfoString(buf, " ASC"); - else if (srt->sortop == typentry->gt_opr) - appendStringInfoString(buf, " DESC"); - else - { - HeapTuple opertup; - Form_pg_operator operform; - - appendStringInfoString(buf, " USING "); - - /* Append operator name. */ - opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(srt->sortop)); - if (!HeapTupleIsValid(opertup)) - elog(ERROR, "cache lookup failed for operator %u", srt->sortop); - operform = (Form_pg_operator) GETSTRUCT(opertup); - deparseOperatorName(buf, operform); - ReleaseSysCache(opertup); - } - - if (srt->nulls_first) - appendStringInfoString(buf, " NULLS FIRST"); - else - appendStringInfoString(buf, " NULLS LAST"); + appendOrderBySuffix(srt->sortop, sortcoltype, srt->nulls_first, + context); } } @@ -3486,7 +3540,11 @@ appendOrderByClause(List *pathkeys, bool has_final_sort, foreach(lcell, pathkeys) { PathKey *pathkey = lfirst(lcell); + EquivalenceMember *em; Expr *em_expr; + HeapTuple tuple; + Oid oprid; + bool isNull; if (has_final_sort) { @@ -3494,26 +3552,44 @@ appendOrderByClause(List *pathkeys, bool has_final_sort, * By construction, context->foreignrel is the input relation to * the final sort. */ - em_expr = find_em_expr_for_input_target(context->root, - pathkey->pk_eclass, - context->foreignrel->reltarget); + em = find_em_for_input_target(context->root, + pathkey->pk_eclass, + context->foreignrel->reltarget); } else - em_expr = find_em_expr_for_rel(pathkey->pk_eclass, baserel); + em = find_em_for_rel(pathkey->pk_eclass, baserel); + + em_expr = em->em_expr; + + /* + * Lookup the operator corresponding to the strategy in the opclass. + * The datatype used by the opfamily is not necessarily the same as + * the expression type (for array types for example). + */ + tuple = SearchSysCache4(AMOPSTRATEGY, + ObjectIdGetDatum(pathkey->pk_opfamily), + ObjectIdGetDatum(em->em_datatype), + ObjectIdGetDatum(em->em_datatype), + Int16GetDatum(pathkey->pk_strategy)); + + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + pathkey->pk_strategy, em->em_datatype, em->em_datatype, + pathkey->pk_opfamily); - Assert(em_expr != NULL); + oprid = DatumGetObjectId(SysCacheGetAttr(AMOPSTRATEGY, tuple, + Anum_pg_amop_amopopr, &isNull)); + ReleaseSysCache(tuple); appendStringInfoString(buf, delim); deparseExpr(em_expr, context); - if (pathkey->pk_strategy == BTLessStrategyNumber) - appendStringInfoString(buf, " ASC"); - else - appendStringInfoString(buf, " DESC"); - if (pathkey->pk_nulls_first) - appendStringInfoString(buf, " NULLS FIRST"); - else - appendStringInfoString(buf, " NULLS LAST"); + /* + * Here we need to use the expression type to compare against the + * default btree sort operator. + */ + appendOrderBySuffix(oprid, exprType((Node *) em_expr), + pathkey->pk_nulls_first, context); delim = ", "; } diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index e3ee30f1aa..1f44c365ef 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -3258,6 +3258,19 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6)) (6 rows) +-- Ensure that we don't push down an ORDER BY with a non-shippable operator +explain (verbose, costs off) +select * from ft2 order by c1 using operator(public.<^); + QUERY PLAN +------------------------------------------------------------------------------- + Sort + Output: c1, c2, c3, c4, c5, c6, c7, c8 + Sort Key: ft2.c1 USING <^ + -> Foreign Scan on public.ft2 + Output: c1, c2, c3, c4, c5, c6, c7, c8 + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" +(6 rows) + -- Update local stats on ft2 ANALYZE ft2; -- Add into extension @@ -3285,6 +3298,16 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 {6,16,26,36,46,56,66,76,86,96} (1 row) +-- Ensure that the ORDER BY is pushed to the foreign server +explain (verbose, costs off) +select * from ft2 order by c1 using operator(public.<^); + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on public.ft2 + Output: c1, c2, c3, c4, c5, c6, c7, c8 + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY "C 1" USING OPERATOR(public.<^) NULLS LAST +(3 rows) + -- Remove from extension alter extension postgres_fdw drop operator class my_op_class using btree; alter extension postgres_fdw drop function my_op_cmp(a int, b int); diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 4bdab30a73..1dd4779185 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -917,8 +917,6 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) foreach(lc, root->query_pathkeys) { PathKey *pathkey = (PathKey *) lfirst(lc); - EquivalenceClass *pathkey_ec = pathkey->pk_eclass; - Expr *em_expr; /* * The planner and executor don't have any clever strategy for @@ -926,13 +924,8 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) * getting it to be sorted by all of those pathkeys. We'll just * end up resorting the entire data set. So, unless we can push * down all of the query pathkeys, forget it. - * - * is_foreign_expr would detect volatile expressions as well, but - * checking ec_has_volatile here saves some cycles. */ - if (pathkey_ec->ec_has_volatile || - !(em_expr = find_em_expr_for_rel(pathkey_ec, rel)) || - !is_foreign_expr(root, rel, em_expr)) + if (!is_foreign_pathkey(root, rel, pathkey)) { query_pathkeys_ok = false; break; @@ -6540,9 +6533,9 @@ add_foreign_ordered_paths(PlannerInfo *root, RelOptInfo *input_rel, return; /* Get the sort expression for the pathkey_ec */ - sort_expr = find_em_expr_for_input_target(root, - pathkey_ec, - input_rel->reltarget); + sort_expr = find_em_for_input_target(root, + pathkey_ec, + input_rel->reltarget)->em_expr; /* If it's unsafe to remote, we cannot push down the final sort */ if (!is_foreign_expr(root, input_rel, sort_expr)) @@ -7332,13 +7325,12 @@ conversion_error_callback(void *arg) } /* - * Find an equivalence class member expression to be computed as a sort column - * in the given target. + * Find an equivalence class member to be computed as a sort column in the + * given target. */ -Expr * -find_em_expr_for_input_target(PlannerInfo *root, - EquivalenceClass *ec, - PathTarget *target) +EquivalenceMember * +find_em_for_input_target(PlannerInfo *root, EquivalenceClass *ec, + PathTarget *target) { ListCell *lc1; int i; @@ -7383,7 +7375,7 @@ find_em_expr_for_input_target(PlannerInfo *root, em_expr = ((RelabelType *) em_expr)->arg; if (equal(em_expr, expr)) - return em->em_expr; + return em; } i++; diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index ca83306af9..9930226b6f 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -171,6 +171,9 @@ extern bool is_foreign_expr(PlannerInfo *root, extern bool is_foreign_param(PlannerInfo *root, RelOptInfo *baserel, Expr *expr); +extern bool is_foreign_pathkey(PlannerInfo *root, + RelOptInfo *baserel, + PathKey *pathkey); extern void deparseInsertSql(StringInfo buf, RangeTblEntry *rte, Index rtindex, Relation rel, List *targetAttrs, bool doNothing, @@ -213,10 +216,10 @@ extern void deparseTruncateSql(StringInfo buf, DropBehavior behavior, bool restart_seqs); extern void deparseStringLiteral(StringInfo buf, const char *val); -extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel); -extern Expr *find_em_expr_for_input_target(PlannerInfo *root, - EquivalenceClass *ec, - PathTarget *target); +extern EquivalenceMember *find_em_for_rel(EquivalenceClass *ec, RelOptInfo *rel); +extern EquivalenceMember *find_em_for_input_target(PlannerInfo *root, + EquivalenceClass *ec, + PathTarget *target); extern List *build_tlist_to_deparse(RelOptInfo *foreignrel); extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *foreignrel, List *tlist, diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 30b5175da5..1bbe1212c4 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -902,6 +902,10 @@ create operator class my_op_class for type int using btree family my_op_family a explain (verbose, costs off) select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2; +-- Ensure that we don't push down an ORDER BY with a non-shippable operator +explain (verbose, costs off) +select * from ft2 order by c1 using operator(public.<^); + -- Update local stats on ft2 ANALYZE ft2; @@ -919,6 +923,10 @@ explain (verbose, costs off) select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2; select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2; +-- Ensure that the ORDER BY is pushed to the foreign server +explain (verbose, costs off) +select * from ft2 order by c1 using operator(public.<^); + -- Remove from extension alter extension postgres_fdw drop operator class my_op_class using btree; alter extension postgres_fdw drop function my_op_cmp(a int, b int); diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 6f1abbe47d..b367f18eab 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -932,11 +932,11 @@ is_exprlist_member(Expr *node, List *exprs) } /* - * Find an equivalence class member expression, all of whose Vars, come from - * the indicated relation. + * Find an equivalence class member, all of whose Vars, come from the + * indicated relation. */ -Expr * -find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) +EquivalenceMember * +find_em_for_rel(EquivalenceClass *ec, RelOptInfo *rel) { ListCell *lc_em; @@ -952,7 +952,7 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) * taken entirely from this relation, we'll be content to choose * any one of those. */ - return em->em_expr; + return em; } } @@ -960,6 +960,22 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) return NULL; } +/* + * Find an equivalence class member expression, all of whose Vars, come from + * the indicated relation. + */ +Expr * +find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) +{ + EquivalenceMember *em = find_em_for_rel(ec, rel); + + if (em != NULL) + return em->em_expr; + + return NULL; +} + + /* * relation_can_be_sorted_early * Can this relation be sorted on this EC before the final output step? diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index f1d111063c..5a2bbc87e1 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -144,6 +144,8 @@ extern EquivalenceMember *find_computable_ec_member(PlannerInfo *root, Relids relids, bool require_parallel_safe); extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel); +extern EquivalenceMember *find_em_for_rel(EquivalenceClass *ec, + RelOptInfo *rel); extern bool relation_can_be_sorted_early(PlannerInfo *root, RelOptInfo *rel, EquivalenceClass *ec, bool require_parallel_safe); -- 2.33.0