On Wed, Aug 26, 2015 at 5:28 PM, Tom Lane <t...@sss.pgh.pa.us> wrote: > > After looking at the code a bit, IMO the most reasonable thing to do is to > include this transformation in inline_set_returning_functions(), perhaps > renaming it to something like inline_srfs_and_ctes(). >
This is essentially the same as my current implementation (revised patch attached): 1. There are two call sites of inline_set_returning_functions(), and one place is guarded with Assert(subquery->cteList == NIL). This means transformation in subquery_planner() is effective. 2. A problem with revised patch is that we can't get rid of non-used CTEs show up in EXPLAIN. IMHO, here the problem is not "multiple levels" but "multiple references". "levels" is handled well by recursion but references are not: set returning function seems does not have the this issue because you don't define a function along the query. Regards, Qingqing --- Two testing queries results with revised patch: 1. Extra CTE q and p prints in EXPLAIN: postgres=# explain WITH q AS ( postgres(# WITH p AS (SELECT * from a) SELECT p.* FROM p JOIN p p1 on p.i>=p1.i) postgres-# SELECT * FROM q WHERE i <= 5; QUERY PLAN ----------------------------------------------------------------------------------------- Nested Loop (cost=1443.59..7423.16 rows=133333 width=8) CTE q -> Nested Loop (cost=1443.29..91700762.00 rows=3333333333 width=8) CTE p -> Seq Scan on a a_2 (cost=0.00..1443.00 rows=100000 width=8) -> Seq Scan on a a_3 (cost=0.00..1443.00 rows=100000 width=8) -> Index Only Scan using ai on a a_4 (cost=0.29..583.65 rows=33333 width=4) Index Cond: (i <= a_3.i) CTE p -> Seq Scan on a a_5 (cost=0.00..1443.00 rows=100000 width=8) -> Index Scan using ai on a (cost=0.29..8.36 rows=4 width=8) Index Cond: (i <= 5) -> Index Only Scan using ai on a a_1 (cost=0.29..1159.62 rows=33333 width=4) Index Cond: (i <= a.i) (14 rows) 2. Extra m1 show up and same problem still there: postgres=# explain WITH q AS ( postgres(# WITH p AS (SELECT * from a) SELECT p.* FROM p JOIN p p1 on postgres(# p.i>=p1.i), m as (select * from q), m1 as (select * from m) postgres-# SELECT * FROM m1 WHERE i <= 5; QUERY PLAN ----------------------------------------------------------------------------------------- CTE Scan on m (cost=225034095.32..300034095.31 rows=1111111111 width=8) Filter: (i <= 5) CTE q -> Nested Loop (cost=1443.29..91700762.00 rows=3333333333 width=8) CTE p -> Seq Scan on a (cost=0.00..1443.00 rows=100000 width=8) -> Seq Scan on a a_1 (cost=0.00..1443.00 rows=100000 width=8) -> Index Only Scan using ai on a a_2 (cost=0.29..583.65 rows=33333 width=4) Index Cond: (i <= a_1.i) CTE m -> CTE Scan on q (cost=0.00..66666666.66 rows=3333333333 width=8) CTE m1 -> CTE Scan on m m_1 (cost=0.00..66666666.66 rows=3333333333 width=8) (13 rows)
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c new file mode 100644 index 7069f60..7b49816 *** a/src/backend/optimizer/path/costsize.c --- b/src/backend/optimizer/path/costsize.c *************** bool enable_nestloop = true; *** 118,123 **** --- 118,124 ---- bool enable_material = true; bool enable_mergejoin = true; bool enable_hashjoin = true; + bool enable_cte_subquery = true; typedef struct { diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c new file mode 100644 index 401ba5b..58698ab *** a/src/backend/optimizer/prep/prepjointree.c --- b/src/backend/optimizer/prep/prepjointree.c *************** *** 31,36 **** --- 31,37 ---- #include "optimizer/prep.h" #include "optimizer/subselect.h" #include "optimizer/tlist.h" + #include "optimizer/cost.h" #include "parser/parse_relation.h" #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" *************** static void fix_append_rel_relids(List * *** 116,122 **** Relids subrelids); static Node *find_jointree_node_for_rel(Node *jtnode, int relid); - /* * pull_up_sublinks * Attempt to pull up ANY and EXISTS SubLinks to be treated as --- 117,122 ---- *************** inline_set_returning_functions(PlannerIn *** 577,583 **** { RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt); ! if (rte->rtekind == RTE_FUNCTION) { Query *funcquery; --- 577,599 ---- { RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt); ! if (rte->rtekind == RTE_CTE) ! { ! Query *subquery; ! ! if (!enable_cte_subquery) ! continue; ! ! /* Check safety of expansion, and expand if possible */ ! subquery = substitute_cte_with_subquery(root, rte); ! if (subquery) ! { ! /* Successful expansion, replace the rtable entry */ ! rte->rtekind = RTE_SUBQUERY; ! rte->subquery = subquery; ! } ! } ! else if (rte->rtekind == RTE_FUNCTION) { Query *funcquery; diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c new file mode 100644 index c72dbef..274c0bb *** a/src/backend/optimizer/util/clauses.c --- b/src/backend/optimizer/util/clauses.c *************** evaluate_expr(Expr *expr, Oid result_typ *** 4677,4682 **** --- 4677,4728 ---- resultTypByVal); } + /* + * substitute_cte_with_subquery + * Attempt to sustitute a CTE with a subquery. + * + */ + Query * + substitute_cte_with_subquery(PlannerInfo *root, RangeTblEntry *rte) + { + List *cteList; + ListCell *lc; + + Assert(rte->rtekind == RTE_CTE); + + /* + * Lookup matching RTE by name in current level's CTE list. + */ + cteList = root->parse->cteList; + foreach(lc, cteList) + { + CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); + Query *query = (Query *)cte->ctequery; + + /* Not interested in unused CTE */ + if (!cte->cterefcount) + continue; + + /* Do not convert non-select, with-recursive CTE */ + if (query->commandType != CMD_SELECT || cte->cterecursive) + continue; + + if (!strcmp(cte->ctename, rte->ctename)) + { + /* + * Expanding volatile function could cause multiple evaluation of the + * function, so we can't do it. Also, volatile check is expensive so arrange + * it after name check. + */ + if (contain_volatile_functions((Node *)query)) + continue; + + return query; + } + } + + return NULL; + } /* * inline_set_returning_function diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c new file mode 100644 index b3dac51..7f4e2bd *** a/src/backend/utils/misc/guc.c --- b/src/backend/utils/misc/guc.c *************** static struct config_bool ConfigureNames *** 873,878 **** --- 873,887 ---- NULL, NULL, NULL }, { + {"enable_cte_subquery", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables the planner subsitutes CTEs with subqueries."), + NULL + }, + &enable_cte_subquery, + true, + NULL, NULL, NULL + }, + { {"geqo", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("Enables genetic query optimization."), gettext_noop("This algorithm attempts to do planning without " diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h new file mode 100644 index 3d04ac2..27a7227 *** a/src/include/optimizer/clauses.h --- b/src/include/optimizer/clauses.h *************** extern Node *estimate_expression_value(P *** 84,88 **** --- 84,89 ---- extern Query *inline_set_returning_function(PlannerInfo *root, RangeTblEntry *rte); + extern Query *substitute_cte_with_subquery(PlannerInfo *root, RangeTblEntry *rte); #endif /* CLAUSES_H */ diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h new file mode 100644 index dd43e45..58a146b *** a/src/include/optimizer/cost.h --- b/src/include/optimizer/cost.h *************** extern bool enable_nestloop; *** 61,66 **** --- 61,67 ---- extern bool enable_material; extern bool enable_mergejoin; extern bool enable_hashjoin; + extern bool enable_cte_subquery; extern int constraint_exclusion; extern double clamp_row_est(double nrows);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers