On 5/10/2022 02:45, Zhihong Yu wrote:
Hi,
For contain_placeholders():
+ if (IsA(node, Query))
+ return query_tree_walker((Query *) node, contain_placeholders,
context, 0);
+ else if (IsA(node, PlaceHolderVar))
Fixed
The `else` is not needed.
For correlated_t struct, it would be better if the fields have comments.
Ok, I've added some comments.
+ * (for grouping, as an example). So, revert its
status to
+ * a full valued entry.
full valued -> fully valued
Fixed
--
regards,
Andrey Lepikhov
Postgres Professional
From da570914e53bc34e6bf3291649725a041a8b929c Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepik...@postgrespro.ru>
Date: Wed, 28 Sep 2022 13:42:12 +0300
Subject: [PATCH] Transform correlated subquery of type N-J [1] into ordinary
join query. With many restrictions, check each subquery and pull up
expressions, references upper query block. Works for operators '=' and 'IN'.
Machinery of converting ANY subquery to JOIN stays the same with minor
changes in walker function.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Pass a lower outer join through the pullup_sublink recursive procedure to find
out some situations when qual references outer side of an outer join.
[1] Kim, Won. “On optimizing an SQL-like nested query.” ACM Trans. Database
Syst. 7 (1982): 443-469.
---
src/backend/optimizer/plan/subselect.c | 328 +++++++++++++++-
src/backend/optimizer/prep/prepjointree.c | 71 ++--
src/backend/optimizer/util/tlist.c | 2 +-
src/backend/optimizer/util/var.c | 8 +
src/backend/utils/misc/guc_tables.c | 10 +
src/include/optimizer/optimizer.h | 1 +
src/include/optimizer/subselect.h | 3 +-
src/include/optimizer/tlist.h | 1 +
src/test/regress/expected/prepare.out | 18 +
src/test/regress/expected/subselect.out | 438 ++++++++++++++++++++++
src/test/regress/sql/prepare.sql | 7 +
src/test/regress/sql/subselect.sql | 162 ++++++++
src/tools/pgindent/typedefs.list | 1 +
13 files changed, 1013 insertions(+), 37 deletions(-)
diff --git a/src/backend/optimizer/plan/subselect.c
b/src/backend/optimizer/plan/subselect.c
index 92e3338584..be19d85524 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -32,6 +32,7 @@
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/subselect.h"
+#include "optimizer/tlist.h"
#include "parser/parse_relation.h"
#include "rewrite/rewriteManip.h"
#include "utils/builtins.h"
@@ -65,6 +66,8 @@ typedef struct inline_cte_walker_context
} inline_cte_walker_context;
+bool optimize_correlated_subqueries = true;
+
static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
List *plan_params,
SubLinkType subLinkType, int
subLinkId,
@@ -1229,6 +1232,296 @@ inline_cte_walker(Node *node, inline_cte_walker_context
*context)
return expression_tree_walker(node, inline_cte_walker, context);
}
+static bool
+contain_placeholders(Node *node, inline_cte_walker_context *context)
+{
+ if (node == NULL)
+ return false;
+
+ if (IsA(node, Query))
+ return query_tree_walker((Query *) node, contain_placeholders,
context, 0);
+ if (IsA(node, PlaceHolderVar))
+ return true;
+
+ return expression_tree_walker(node, contain_placeholders, context);
+}
+
+/*
+ * To be pullable all clauses of flattening correlated subquery should be
+ * anded and mergejoinable (XXX: really necessary?)
+ */
+static bool
+quals_is_pullable(Node *quals)
+{
+ if (!contain_vars_of_level(quals, 1))
+ return true;
+
+ if (quals && IsA(quals, OpExpr))
+ {
+ OpExpr *expr = (OpExpr *) quals;
+ Node *leftarg;
+
+ /* Contains only one expression */
+ leftarg = linitial(expr->args);
+ if (!op_mergejoinable(expr->opno, exprType(leftarg))) /* Is it
really necessary ? */
+ return false;
+
+ if (contain_placeholders(quals, NULL))
+ return false;
+
+ return true;
+ }
+ else if (is_andclause(quals))
+ {
+ ListCell *l;
+
+ foreach(l, ((BoolExpr *) quals)->args)
+ {
+ Node *andarg = (Node *) lfirst(l);
+
+ if (!IsA(andarg, OpExpr))
+ return false;
+ if (!quals_is_pullable(andarg))
+ return false;
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Mutator context used in pull-up process of expressions from WHERE quals.
+ */
+typedef struct
+{
+ Query *subquery; /* Link to the subquery which we are trying to pull
up */
+ List *pulling_quals; /* List of expressions contained pulled
expressions */
+ int newvarno; /* Index or range table entry which is a
source of pulling
+ varno */
+ bool varlevel_up; /* true if a pulling expression is being processed,
+ false otherwise. */
+} correlated_t;
+
+/*
+ * Pull up expressions, containing a link to upper relation. In the qual it
will
+ * be replaced by TRUE const. In this expression, all links to upper levels
will
+ * be arranged by one level.
+ */
+static Node *
+pull_subquery_clauses_mutator(Node *node, correlated_t *ctx)
+{
+ if (node == NULL)
+ return NULL;
+
+ if (IsA(node, OpExpr) && !ctx->varlevel_up)
+ {
+ if (!contain_vars_of_level(node, 1))
+ return node;
+
+ /*
+ * The expression contains links to upper relation. It will be
pulled to
+ * uplevel. All links into vars of upper levels must be changed.
+ */
+
+ ctx->varlevel_up = true;
+ ctx->pulling_quals =
+ lappend(ctx->pulling_quals,
+ expression_tree_mutator(node,
+
pull_subquery_clauses_mutator,
+
(void *) ctx));
+ ctx->varlevel_up = false;
+
+ /* Replace position of pulled expression by the 'true' value */
+ return makeBoolConst(true, false);
+ }
+ if (IsA(node, PlaceHolderVar))
+ {
+ PlaceHolderVar *phv = (PlaceHolderVar *) node;
+
+ if (ctx->varlevel_up && phv->phlevelsup > 0)
+ phv->phlevelsup--;
+ /* fall through to recurse into argument */
+ }
+ else if (IsA(node, RangeTblEntry))
+ {
+ RangeTblEntry *rte = (RangeTblEntry *) node;
+
+ if (rte->rtekind == RTE_CTE)
+ {
+ if (rte->ctelevelsup > 0 && ctx->varlevel_up)
+ rte->ctelevelsup--;
+ }
+ return node;
+ }
+ else if (IsA(node, Aggref))
+ {
+ if (((Aggref *) node)->agglevelsup > 0 && ctx->varlevel_up)
+ ((Aggref *) node)->agglevelsup--;
+ return node;
+ }
+ else if (IsA(node, GroupingFunc))
+ {
+ if (((GroupingFunc *) node)->agglevelsup > 0 &&
ctx->varlevel_up)
+ ((GroupingFunc *) node)->agglevelsup--;
+ return node;
+ }
+ else if (IsA(node, Var))
+ {
+ Var *var = (Var *) node;
+
+ Assert(ctx->varlevel_up);
+
+ /* An upper relation variable */
+ if (var->varlevelsup > 0)
+ {
+ /*
+ * Isn't needed to copy node or change varno because it
correctly
+ * refers to Table Entry of a parent and already
removed from
+ * the subquery clauses list.
+ */
+ var->varlevelsup--;
+
+ return (Node *) var;
+ }
+ else
+ {
+ Var *newvar;
+ TargetEntry *tle;
+
+ /*
+ * The var refers to subquery table entry. Include a
copy the var
+ * into the target list, if necessary. Arrange varattno
of the
+ * new var of upper relation with a link to this entry.
+ */
+
+ /* Create a var for usage in upper relation */
+ newvar = (Var *) copyObject(node);
+
+ /* Does the var already exists in the target list? */
+ tle = tlist_member_match_var(var,
ctx->subquery->targetList);
+
+ if (tle == NULL)
+ {
+ int resno =
list_length(ctx->subquery->targetList) + 1;
+
+ /*
+ * Target list of the subquery doesn't contain
this var. Add it
+ * into the end of the target list and correct
the link
+ * XXX: Maybe choose real colname here?
+ */
+ tle = makeTargetEntry((Expr *) var, resno,
"rescol", false);
+ ctx->subquery->targetList =
lappend(ctx->subquery->targetList,
+
tle);
+ }
+ else
+ {
+ if (tle->resjunk)
+ {
+ /*
+ * Target entry exists but used as an
utility entry
+ * (for grouping, as an example). So,
revert its status to
+ * a fully valued entry.
+ */
+ tle->resjunk = false;
+ tle->resname = pstrdup("resjunkcol");
+ }
+ }
+
+ /*
+ * Set the new var to refer newly created RangeTblEntry
in the upper
+ * query and varattno to refer at specific position in
the target
+ * list.
+ */
+ newvar->varno = ctx->newvarno;
+ newvar->varattno = tle->resno;
+
+ return (Node *) newvar;
+ }
+ }
+ if (IsA(node, Query))
+ return (Node *) query_tree_mutator((Query *) node,
+
pull_subquery_clauses_mutator,
+
(void *) ctx, 0);
+
+ return expression_tree_mutator(node, pull_subquery_clauses_mutator,
+ (void *)
ctx);
+}
+
+static List *
+pull_correlated_clauses(PlannerInfo *root, SubLink *sublink,
+ JoinExpr *lowest_outer_join)
+{
+ Query *parse = root->parse;
+ Query *subselect = (Query *) sublink->subselect;
+ FromExpr *f;
+ correlated_t ctx = {.subquery = subselect,
+ .newvarno =
list_length(parse->rtable) + 1, /* Looks like a hack */
+ .pulling_quals = NIL,
+ .varlevel_up = false};
+ Relids safe_upper_varnos = NULL;
+
+ Assert(IsA(subselect, Query));
+
+ /* Use only for correlated candidates, just for optimal usage */
+ Assert(contain_vars_of_level((Node *) subselect, 1));
+
+ if (!optimize_correlated_subqueries ||
+ subselect->hasAggs ||
+ subselect->hasWindowFuncs ||
+ subselect->hasForUpdate || /* Pulling of clauses can change a
number of tuples which subselect returns. */
+ subselect->hasRowSecurity /* Just because of paranoid safety */
+ )
+ /* The feature is switched off. */
+ return NULL;
+
+ /*
+ * We pull up quals and arrange variable levels for expressions in WHERE
+ * section only. So, cut the optimization off if an upper relation links
+ * from another parts of the subquery are detected.
+ */
+ if (contain_vars_of_level((Node *) subselect->cteList, 1) ||
+ /* see comments in subselect.sql */
+ contain_vars_of_level((Node *) subselect->rtable, 1) ||
+ contain_vars_of_level((Node *) subselect->targetList, 1) ||
+ contain_vars_of_level((Node *) subselect->returningList, 1) ||
+ contain_vars_of_level((Node *) subselect->groupingSets, 1) ||
+ contain_vars_of_level((Node *) subselect->distinctClause, 1) ||
+ contain_vars_of_level((Node *) subselect->sortClause, 1) ||
+ contain_vars_of_level((Node *) subselect->limitOffset, 1) ||
+ contain_vars_of_level((Node *) subselect->limitCount, 1) ||
+ contain_vars_of_level((Node *) subselect->rowMarks, 1) ||
+ contain_vars_of_level((Node *) subselect->havingQual, 1) ||
+ contain_vars_of_level((Node *) subselect->groupClause, 1))
+ return NULL;
+
+ f = subselect->jointree;
+
+ if (!f || !f->quals || !quals_is_pullable(f->quals))
+ return NULL;
+
+ if (lowest_outer_join)
+ safe_upper_varnos = get_relids_in_jointree(
+ (lowest_outer_join->jointype == JOIN_RIGHT) ?
+ lowest_outer_join->larg :
lowest_outer_join->rarg, true);
+
+ if (safe_upper_varnos &&
+ !bms_is_subset(pull_varnos_of_level(root, f->quals, 1),
+ safe_upper_varnos))
+ return NULL;
+
+ /*
+ * Now, is proved that it is possible to pull up expressions with
variables
+ * from the upper query.
+ * Pull up quals, containing correlated expressions. Replace its
+ * positions with a true boolean expression.
+ * It would be removed on a next planning stage.
+ */
+ f->quals = pull_subquery_clauses_mutator(f->quals, (void *) &ctx);
+
+ return ctx.pulling_quals;
+}
/*
* convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join
@@ -1266,7 +1559,7 @@ inline_cte_walker(Node *node, inline_cte_walker_context
*context)
*/
JoinExpr *
convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
- Relids available_rels)
+ Relids available_rels,
JoinExpr *lowest_outer_join)
{
JoinExpr *result;
Query *parse = root->parse;
@@ -1279,16 +1572,10 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink
*sublink,
List *subquery_vars;
Node *quals;
ParseState *pstate;
+ List *pclauses = NIL;
Assert(sublink->subLinkType == ANY_SUBLINK);
- /*
- * The sub-select must not refer to any Vars of the parent query. (Vars
of
- * higher levels should be okay, though.)
- */
- if (contain_vars_of_level((Node *) subselect, 1))
- return NULL;
-
/*
* The test expression must contain some Vars of the parent query, else
* it's not gonna be a join. (Note that it won't have Vars referring to
@@ -1310,6 +1597,17 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink
*sublink,
if (contain_volatile_functions(sublink->testexpr))
return NULL;
+ /*
+ * The sub-select must not refer to any Vars of the parent query. (Vars
of
+ * higher levels should be okay, though.)
+ * In the case of correlated subquery, jointree quals structure will be
+ * modified: expressions with variables from upper query moves to the
+ * pulled_clauses list, their places in the quals replaces by "true"
value.
+ */
+ if (contain_vars_of_level((Node *) subselect, 1) &&
+ (pclauses = pull_correlated_clauses(root, sublink,
lowest_outer_join)) == NIL)
+ return NULL;
+
/* Create a dummy ParseState for addRangeTableEntryForSubquery */
pstate = make_parsestate(NULL);
@@ -1348,6 +1646,20 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink
*sublink,
*/
quals = convert_testexpr(root, sublink->testexpr, subquery_vars);
+ /* Nested subquery with references to upper level relation. */
+ if (pclauses != NIL)
+ {
+ /* Add clauses, pulled from subquery into WHERE section of the
parent. */
+ if (IsA(quals, BoolExpr))
+ {
+ BoolExpr *b = (BoolExpr *) quals;
+ b->args = list_concat(b->args, pclauses);
+ }
+ else
+ quals = (Node *) make_andclause(
+
list_concat(list_make1(quals), pclauses));
+ }
+
/*
* And finally, build the JoinExpr node.
*/
diff --git a/src/backend/optimizer/prep/prepjointree.c
b/src/backend/optimizer/prep/prepjointree.c
index 41c7066d90..f72b8b1320 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -62,10 +62,12 @@ typedef struct reduce_outer_joins_state
} reduce_outer_joins_state;
static Node *pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
-
Relids *relids);
+
Relids *relids,
+
JoinExpr *lowest_outer_join);
static Node *pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
Node **jtlink1, Relids available_rels1,
-
Node **jtlink2, Relids available_rels2);
+
Node **jtlink2, Relids available_rels2,
+
JoinExpr *lowest_outer_join);
static Node *pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode,
JoinExpr *lowest_outer_join,
JoinExpr *lowest_nulling_outer_join,
@@ -293,7 +295,7 @@ pull_up_sublinks(PlannerInfo *root)
/* Begin recursion through the jointree */
jtnode = pull_up_sublinks_jointree_recurse(root,
(Node *) root->parse->jointree,
-
&relids);
+
&relids, NULL);
/*
* root->parse->jointree must always be a FromExpr, so insert a dummy
one
@@ -313,7 +315,7 @@ pull_up_sublinks(PlannerInfo *root)
*/
static Node *
pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
- Relids
*relids)
+ Relids
*relids, JoinExpr *lowest_outer_join)
{
if (jtnode == NULL)
{
@@ -343,7 +345,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node
*jtnode,
newchild = pull_up_sublinks_jointree_recurse(root,
lfirst(l),
-
&childrelids);
+
&childrelids,
+
lowest_outer_join);
newfromlist = lappend(newfromlist, newchild);
frelids = bms_join(frelids, childrelids);
}
@@ -354,7 +357,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node
*jtnode,
/* Now process qual --- all children are available for use */
newf->quals = pull_up_sublinks_qual_recurse(root, f->quals,
&jtlink, frelids,
-
NULL, NULL);
+
NULL, NULL,
+
lowest_outer_join);
/*
* Note that the result will be either newf, or a stack of
JoinExprs
@@ -385,9 +389,11 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node
*jtnode,
/* Recurse to process children and collect their relids */
j->larg = pull_up_sublinks_jointree_recurse(root, j->larg,
-
&leftrelids);
+
&leftrelids,
+
lowest_outer_join);
j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg,
-
&rightrelids);
+
&rightrelids,
+
lowest_outer_join);
/*
* Now process qual, showing appropriate child relids as
available,
@@ -408,13 +414,14 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node
*jtnode,
&jtlink,
bms_union(leftrelids,
rightrelids),
-
NULL, NULL);
+
NULL, NULL,
+
lowest_outer_join);
break;
case JOIN_LEFT:
j->quals = pull_up_sublinks_qual_recurse(root,
j->quals,
&j->rarg,
rightrelids,
-
NULL, NULL);
+
NULL, NULL, j);
break;
case JOIN_FULL:
/* can't do anything with full-join quals */
@@ -423,7 +430,7 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node
*jtnode,
j->quals = pull_up_sublinks_qual_recurse(root,
j->quals,
&j->larg,
leftrelids,
-
NULL, NULL);
+
NULL, NULL, j);
break;
default:
elog(ERROR, "unrecognized join type: %d",
@@ -468,7 +475,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node
*jtnode,
static Node *
pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
Node **jtlink1,
Relids available_rels1,
- Node **jtlink2,
Relids available_rels2)
+ Node **jtlink2,
Relids available_rels2,
+ JoinExpr
*lowest_outer_join)
{
if (node == NULL)
return NULL;
@@ -482,7 +490,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
if (sublink->subLinkType == ANY_SUBLINK)
{
if ((j = convert_ANY_sublink_to_join(root, sublink,
-
available_rels1)) != NULL)
+
available_rels1,
+
lowest_outer_join)) != NULL)
{
/* Yes; insert the new join node into the join
tree */
j->larg = *jtlink1;
@@ -490,7 +499,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
/* Recursively process pulled-up jointree nodes
*/
j->rarg =
pull_up_sublinks_jointree_recurse(root,
j->rarg,
-
&child_rels);
+
&child_rels, j);
/*
* Now recursively process the pulled-up quals.
Any inserted
@@ -502,13 +511,15 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node
*node,
&j->larg,
available_rels1,
&j->rarg,
-
child_rels);
+
child_rels,
+
j);
/* Return NULL representing constant TRUE */
return NULL;
}
if (available_rels2 != NULL &&
(j = convert_ANY_sublink_to_join(root, sublink,
-
available_rels2)) != NULL)
+
available_rels2,
+
lowest_outer_join)) != NULL)
{
/* Yes; insert the new join node into the join
tree */
j->larg = *jtlink2;
@@ -516,7 +527,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
/* Recursively process pulled-up jointree nodes
*/
j->rarg =
pull_up_sublinks_jointree_recurse(root,
j->rarg,
-
&child_rels);
+
&child_rels, j);
/*
* Now recursively process the pulled-up quals.
Any inserted
@@ -528,7 +539,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
&j->larg,
available_rels2,
&j->rarg,
-
child_rels);
+
child_rels,
+
j);
/* Return NULL representing constant TRUE */
return NULL;
}
@@ -544,7 +556,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
/* Recursively process pulled-up jointree nodes
*/
j->rarg =
pull_up_sublinks_jointree_recurse(root,
j->rarg,
-
&child_rels);
+
&child_rels, j);
/*
* Now recursively process the pulled-up quals.
Any inserted
@@ -556,7 +568,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
&j->larg,
available_rels1,
&j->rarg,
-
child_rels);
+
child_rels,
+
j);
/* Return NULL representing constant TRUE */
return NULL;
}
@@ -570,7 +583,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
/* Recursively process pulled-up jointree nodes
*/
j->rarg =
pull_up_sublinks_jointree_recurse(root,
j->rarg,
-
&child_rels);
+
&child_rels, j);
/*
* Now recursively process the pulled-up quals.
Any inserted
@@ -582,7 +595,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
&j->larg,
available_rels2,
&j->rarg,
-
child_rels);
+
child_rels,
+
j);
/* Return NULL representing constant TRUE */
return NULL;
}
@@ -610,7 +624,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
/* Recursively process pulled-up
jointree nodes */
j->rarg =
pull_up_sublinks_jointree_recurse(root,
j->rarg,
-
&child_rels);
+
&child_rels, j);
/*
* Now recursively process the
pulled-up quals. Because
@@ -622,7 +636,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
j->quals,
&j->rarg,
child_rels,
-
NULL, NULL);
+
NULL, NULL,
+
j);
/* Return NULL representing constant
TRUE */
return NULL;
}
@@ -636,7 +651,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
/* Recursively process pulled-up
jointree nodes */
j->rarg =
pull_up_sublinks_jointree_recurse(root,
j->rarg,
-
&child_rels);
+
&child_rels, j);
/*
* Now recursively process the
pulled-up quals. Because
@@ -648,7 +663,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
j->quals,
&j->rarg,
child_rels,
-
NULL, NULL);
+
NULL, NULL,
+
j);
/* Return NULL representing constant
TRUE */
return NULL;
}
@@ -673,7 +689,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
jtlink1,
available_rels1,
jtlink2,
-
available_rels2);
+
available_rels2,
+
lowest_outer_join);
if (newclause)
newclauses = lappend(newclauses, newclause);
}
diff --git a/src/backend/optimizer/util/tlist.c
b/src/backend/optimizer/util/tlist.c
index 784a1af82d..5b7aee121f 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -98,7 +98,7 @@ tlist_member(Expr *node, List *targetlist)
* This is needed in some cases where we can't be sure of an exact typmod
* match. For safety, though, we insist on vartype match.
*/
-static TargetEntry *
+TargetEntry *
tlist_member_match_var(Var *var, List *targetlist)
{
ListCell *temp;
diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c
index 7db86c39ef..54441e692b 100644
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@@ -461,6 +461,14 @@ contain_vars_of_level_walker(Node *node, int *sublevels_up)
return true; /* abort the tree traversal and
return true */
/* else fall through to check the contained expr */
}
+ if (IsA(node, RangeTblEntry))
+ {
+ RangeTblEntry *rte = (RangeTblEntry *) node;
+
+ /* Someone can call the routine on a field of Query struct */
+ return range_table_entry_walker(rte,
contain_vars_of_level_walker,
+
(void *) sublevels_up, 0);
+ }
if (IsA(node, Query))
{
/* Recurse into subselects */
diff --git a/src/backend/utils/misc/guc_tables.c
b/src/backend/utils/misc/guc_tables.c
index 05ab087934..55791b474b 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -765,6 +765,16 @@ StaticAssertDecl(lengthof(config_type_names) == (PGC_ENUM
+ 1),
struct config_bool ConfigureNamesBool[] =
{
+ {
+ {"optimize_correlated_subqueries", PGC_USERSET,
QUERY_TUNING_METHOD,
+ gettext_noop("optimize_correlated_subqueries."),
+ NULL,
+ GUC_EXPLAIN | GUC_NOT_IN_SAMPLE
+ },
+ &optimize_correlated_subqueries,
+ true,
+ NULL, NULL, NULL
+ },
{
{"enable_seqscan", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Enables the planner's use of
sequential-scan plans."),
diff --git a/src/include/optimizer/optimizer.h
b/src/include/optimizer/optimizer.h
index 409005bae9..cdf3fdce1a 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -88,6 +88,7 @@ extern PGDLLIMPORT double parallel_tuple_cost;
extern PGDLLIMPORT double parallel_setup_cost;
extern PGDLLIMPORT double recursive_worktable_factor;
extern PGDLLIMPORT int effective_cache_size;
+extern PGDLLIMPORT bool optimize_correlated_subqueries;
extern double clamp_row_est(double nrows);
extern long clamp_cardinality_to_long(Cardinality x);
diff --git a/src/include/optimizer/subselect.h
b/src/include/optimizer/subselect.h
index 456d3076e0..4e126e45ee 100644
--- a/src/include/optimizer/subselect.h
+++ b/src/include/optimizer/subselect.h
@@ -19,7 +19,8 @@
extern void SS_process_ctes(PlannerInfo *root);
extern JoinExpr *convert_ANY_sublink_to_join(PlannerInfo *root,
SubLink *sublink,
-
Relids available_rels);
+
Relids available_rels,
+
JoinExpr *lowest_outer_join);
extern JoinExpr *convert_EXISTS_sublink_to_join(PlannerInfo *root,
SubLink *sublink,
bool under_not,
diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h
index 04668ba1c0..7627e7f679 100644
--- a/src/include/optimizer/tlist.h
+++ b/src/include/optimizer/tlist.h
@@ -18,6 +18,7 @@
extern TargetEntry *tlist_member(Expr *node, List *targetlist);
+extern TargetEntry *tlist_member_match_var(Var *var, List *targetlist);
extern List *add_to_flat_tlist(List *tlist, List *exprs);
diff --git a/src/test/regress/expected/prepare.out
b/src/test/regress/expected/prepare.out
index 5815e17b39..749b3faf64 100644
--- a/src/test/regress/expected/prepare.out
+++ b/src/test/regress/expected/prepare.out
@@ -184,6 +184,24 @@ SELECT name, statement, parameter_types, result_types FROM
pg_prepared_statement
| UPDATE tenk1 SET stringu1 = $2 WHERE unique1 = $1; |
|
(6 rows)
+-- The optimization on unnesting of correlated subqueries should work
+PREPARE q9(name,int) AS
+ SELECT * FROM tenk1 upper WHERE unique1 IN (
+ SELECT sub.unique2 FROM tenk1 sub
+ WHERE sub.stringu1 = $1 AND sub.unique1 = upper.unique2 + $2);
+EXPLAIN (COSTS OFF) EXECUTE q9('abc',2);
+ QUERY PLAN
+-----------------------------------------------------
+ Nested Loop
+ -> HashAggregate
+ Group Key: sub.unique2, sub.unique1
+ -> Seq Scan on tenk1 sub
+ Filter: (stringu1 = 'abc'::name)
+ -> Index Scan using tenk1_unique1 on tenk1 upper
+ Index Cond: (unique1 = sub.unique2)
+ Filter: (sub.unique1 = (unique2 + 2))
+(8 rows)
+
-- test DEALLOCATE ALL;
DEALLOCATE ALL;
SELECT name, statement, parameter_types FROM pg_prepared_statements
diff --git a/src/test/regress/expected/subselect.out
b/src/test/regress/expected/subselect.out
index 63d26d44fc..1523770984 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -164,6 +164,35 @@ SELECT f1 AS "Correlated Field", f2 AS "Second Field"
3 | 3
(6 rows)
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+ FROM SUBSELECT_TBL upper
+ WHERE f1 IN
+ (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------
+ Hash Join
+ Hash Cond: ((upper.f1 = subselect_tbl.f2) AND ((upper.f2)::double precision
= subselect_tbl.f3))
+ -> Seq Scan on subselect_tbl upper
+ -> Hash
+ -> HashAggregate
+ Group Key: subselect_tbl.f2, subselect_tbl.f3
+ -> Seq Scan on subselect_tbl
+(7 rows)
+
+-- Still doesn't work for NOT IN
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+ FROM SUBSELECT_TBL upper
+ WHERE f1 NOT IN
+ (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
+ QUERY PLAN
+-------------------------------------------------------
+ Seq Scan on subselect_tbl upper
+ Filter: (NOT (SubPlan 1))
+ SubPlan 1
+ -> Seq Scan on subselect_tbl
+ Filter: ((upper.f2)::double precision = f3)
+(5 rows)
+
SELECT f1 AS "Correlated Field", f3 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f1 IN
@@ -177,6 +206,415 @@ SELECT f1 AS "Correlated Field", f3 AS "Second Field"
3 | 3
(5 rows)
+-- Constraints, imposed by LATERAL references, prohibit flattening of
underlying
+-- Sublink.
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a
+ WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+ SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+ QUERY PLAN
+-----------------------------------------------
+ Aggregate
+ -> Nested Loop Semi Join
+ Join Filter: (SubPlan 1)
+ -> Seq Scan on subselect_tbl a
+ -> Materialize
+ -> Seq Scan on subselect_tbl b
+ SubPlan 1
+ -> Seq Scan on subselect_tbl c
+ Filter: (f1 = a.f2)
+(9 rows)
+
+SELECT count(*) FROM SUBSELECT_TBL a
+ WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+ SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+ count
+-------
+ 5
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a
+ WHERE NOT EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+ SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+ QUERY PLAN
+-----------------------------------------------
+ Aggregate
+ -> Nested Loop Anti Join
+ Join Filter: (SubPlan 1)
+ -> Seq Scan on subselect_tbl a
+ -> Materialize
+ -> Seq Scan on subselect_tbl b
+ SubPlan 1
+ -> Seq Scan on subselect_tbl c
+ Filter: (f1 = a.f2)
+(9 rows)
+
+-- Prohibit to unnest subquery - quals contain lateral references to rels
+-- outside a higher outer join.
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN (
+ SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2);
+ QUERY PLAN
+---------------------------------------------------------
+ Aggregate
+ -> Nested Loop Left Join
+ Join Filter: (SubPlan 1)
+ -> Seq Scan on subselect_tbl a
+ -> Materialize
+ -> Seq Scan on subselect_tbl b
+ SubPlan 1
+ -> Seq Scan on subselect_tbl c
+ Filter: (f3 = (a.f2)::double precision)
+(9 rows)
+
+SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN (
+ SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2);
+ count
+-------
+ 18
+(1 row)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN
+ (SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f1)
+; -- Optimizer removes excess clause
+ QUERY PLAN
+-----------------------------------------------
+ Hash Join
+ Hash Cond: (a.f1 = b.f2)
+ -> Seq Scan on subselect_tbl a
+ -> Hash
+ -> HashAggregate
+ Group Key: b.f2, b.f2
+ -> Seq Scan on subselect_tbl b
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2*b.f1/b.f3+2 FROM subselect_tbl b WHERE b.f3 = a.f2)
+; -- a bit more complex targetlist expression shouldn't cut off the
optimization
+ QUERY
PLAN
+--------------------------------------------------------------------------------------------------------------------------------------------------------
+ Hash Join
+ Hash Cond: (((a.f1)::double precision = ((((b.f2 * b.f1))::double precision
/ b.f3) + '2'::double precision)) AND ((a.f2)::double precision = b.f3))
+ -> Seq Scan on subselect_tbl a
+ -> Hash
+ -> HashAggregate
+ Group Key: ((((b.f2 * b.f1))::double precision / b.f3) +
'2'::double precision), b.f3
+ -> Seq Scan on subselect_tbl b
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1 FROM subselect_tbl b WHERE b.f3 = a.f2)
+; -- Two variables in a target list
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------------
+ Hash Join
+ Hash Cond: ((a.f1 = b.f2) AND (a.f3 = (b.f1)::double precision) AND
((a.f2)::double precision = b.f3))
+ -> Seq Scan on subselect_tbl a
+ -> Hash
+ -> HashAggregate
+ Group Key: b.f2, (b.f1)::double precision, b.f3
+ -> Seq Scan on subselect_tbl b
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1*2 FROM subselect_tbl b WHERE b.f3 =
a.f2)
+; -- Expression as an element of composite type shouldn't cut off the
optimization
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------------------
+ Hash Join
+ Hash Cond: ((a.f1 = b.f2) AND (a.f3 = ((b.f1 * 2))::double precision) AND
((a.f2)::double precision = b.f3))
+ -> Seq Scan on subselect_tbl a
+ -> Hash
+ -> HashAggregate
+ Group Key: b.f2, ((b.f1 * 2))::double precision, b.f3
+ -> Seq Scan on subselect_tbl b
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f1 =
b.f3 AND b.f3 <> 12)
+; -- Two expressions with correlated variables
+ QUERY PLAN
+-------------------------------------------------------------------------
+ Hash Join
+ Hash Cond: ((b.f2 = a.f1) AND (b.f3 = (a.f2)::double precision))
+ -> HashAggregate
+ Group Key: b.f2, b.f3, b.f3
+ -> Seq Scan on subselect_tbl b
+ Filter: (f3 <> '12'::double precision)
+ -> Hash
+ -> Seq Scan on subselect_tbl a
+ Filter: ((f2)::double precision = (f1)::double precision)
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f2 =
b.f3 AND b.f1 < 12)
+; -- Two expressions with correlated variables relates on one upper variable.
+ QUERY PLAN
+--------------------------------------------------------------------
+ Hash Join
+ Hash Cond: ((a.f1 = b.f2) AND ((a.f2)::double precision = b.f3))
+ -> Seq Scan on subselect_tbl a
+ -> Hash
+ -> HashAggregate
+ Group Key: b.f2, b.f3, b.f3
+ -> Seq Scan on subselect_tbl b
+ Filter: (f1 < 12)
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+ GROUP BY (b.f2)
+ )
+; -- Pull clauses without unnesting the query. XXX: It reduces performance in
most use cases, doesn't it?
+ QUERY PLAN
+-----------------------------------------
+ Hash Join
+ Hash Cond: (b.f2 = a.f1)
+ -> HashAggregate
+ Group Key: b.f2
+ -> Seq Scan on subselect_tbl b
+ Filter: (f2 < 12)
+ -> Hash
+ -> Seq Scan on subselect_tbl a
+ Filter: (f1 = f2)
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ SELECT b.f2 FROM (
+ SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,10) x
+ ) b WHERE b.f2 = a.f2 AND b.f1 BETWEEN 12 AND 14
+ UNION ALL
+ SELECT c.f1 FROM subselect_tbl c
+ WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14
+ )
+; -- Disallow flattening of union all
+ QUERY PLAN
+------------------------------------------------------------------------
+ Seq Scan on subselect_tbl a
+ Filter: (SubPlan 1)
+ SubPlan 1
+ -> Append
+ -> Function Scan on generate_series x
+ Filter: ((x >= 12) AND (x <= 14) AND ((x + 1) = a.f2))
+ -> Seq Scan on subselect_tbl c
+ Filter: ((f1 >= 12) AND (f1 <= 14) AND (f2 = a.f2))
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ SELECT b.f1 FROM subselect_tbl b JOIN subselect_tbl c ON (b.f1 = c.f2)
+ WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14
+ )
+; -- XXX: Could we flatten such subquery?
+ QUERY PLAN
+---------------------------------------------------------------------
+ Seq Scan on subselect_tbl a
+ Filter: (SubPlan 1)
+ SubPlan 1
+ -> Nested Loop
+ -> Seq Scan on subselect_tbl c
+ Filter: ((f1 >= 12) AND (f1 <= 14) AND (f2 = a.f2))
+ -> Seq Scan on subselect_tbl b
+ Filter: (f1 = a.f2)
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ SELECT b.f1 FROM subselect_tbl b, subselect_tbl c
+ WHERE b.f1 = c.f2 AND c.f2 = a.f2 AND c.f1 IS NOT NULL
+ )
+; -- TODO: Could we flatten such subquery?
+ QUERY PLAN
+------------------------------------------------------------------
+ Seq Scan on subselect_tbl a
+ Filter: (SubPlan 1)
+ SubPlan 1
+ -> Nested Loop
+ -> Seq Scan on subselect_tbl b
+ Filter: (f1 = a.f2)
+ -> Materialize
+ -> Seq Scan on subselect_tbl c
+ Filter: ((f1 IS NOT NULL) AND (f2 = a.f2))
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE (a.f1,f2) IN (
+ SELECT b.f2, avg(f3) FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+ GROUP BY (b.f2)
+ )
+; -- Doesn't support unnesting with aggregate functions
+ QUERY PLAN
+-----------------------------------------------------
+ Seq Scan on subselect_tbl a
+ Filter: (SubPlan 1)
+ SubPlan 1
+ -> GroupAggregate
+ Group Key: b.f2
+ -> Seq Scan on subselect_tbl b
+ Filter: ((f2 < 12) AND (f2 = a.f2))
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ WITH cte AS (
+ SELECT * FROM subselect_tbl c WHERE f1 < 42 AND f2 = a.f1
+ )
+ SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12
+ )
+; -- Give up optimization if CTE in subquery contains links to upper relation.
+ QUERY PLAN
+-------------------------------------------------------------------
+ Seq Scan on subselect_tbl a
+ Filter: (SubPlan 1)
+ SubPlan 1
+ -> Result
+ One-Time Filter: (a.f1 = a.f2)
+ -> Seq Scan on subselect_tbl c
+ Filter: ((f1 < 42) AND (f2 < 12) AND (f2 = a.f2))
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ WITH cte AS (
+ SELECT * FROM subselect_tbl c WHERE f1 < 42
+ )
+ SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12
+ )
+; -- Correlated subquery with trivial CTE can be pulled up
+ QUERY PLAN
+-------------------------------------------------
+ Hash Semi Join
+ Hash Cond: (a.f1 = c.f2)
+ -> Seq Scan on subselect_tbl a
+ Filter: (f1 = f2)
+ -> Hash
+ -> Seq Scan on subselect_tbl c
+ Filter: ((f1 < 42) AND (f2 < 12))
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE (a.f1,a.f3) IN (
+ SELECT b.f2, avg(b.f3) OVER (PARTITION BY b.f2)
+ FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+ )
+; -- Doesn't support unnesting with window functions in target list
+ QUERY PLAN
+-----------------------------------------------------
+ Seq Scan on subselect_tbl a
+ Filter: (SubPlan 1)
+ SubPlan 1
+ -> WindowAgg
+ -> Seq Scan on subselect_tbl b
+ Filter: ((f2 < 12) AND (f2 = a.f2))
+(6 rows)
+
+-- A having qual, group clause and so on, with links to upper relation variable
+-- cut off the optimization because another case we must rewrite the subquery
+-- as a lateral TargetEntry and arrange these links.
+-- But now, machinery of convert_ANY_sublink_to_join() isn't prepared for such
+-- complex work and it would induce additional complex code.
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ SELECT b.f2
+ FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+ GROUP BY (b.f2) HAVING b.f2 > a.f3
+ )
+;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------
+ Seq Scan on subselect_tbl a
+ Filter: (SubPlan 1)
+ SubPlan 1
+ -> Group
+ Group Key: b.f2
+ -> Seq Scan on subselect_tbl b
+ Filter: ((f2 < 12) AND (f2 = a.f2) AND ((f2)::double
precision > a.f3))
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ SELECT b.f2 FROM (
+ SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,a.f1) x
+ ) b WHERE b.f2 = a.f2 AND b.f1 < 12
+ )
+; -- Don't allow links to upper query in FROM section of subquery
+ QUERY PLAN
+---------------------------------------------------
+ Seq Scan on subselect_tbl a
+ Filter: (SubPlan 1)
+ SubPlan 1
+ -> Function Scan on generate_series x
+ Filter: ((x < 12) AND ((x + 1) = a.f2))
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ SELECT a.f1 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+ GROUP BY (a.f1)
+ )
+; -- GROUP BY contains link to upper relation
+ QUERY PLAN
+-----------------------------------------------------
+ Seq Scan on subselect_tbl a
+ Filter: (SubPlan 1)
+ SubPlan 1
+ -> Group
+ Group Key: a.f1
+ -> Seq Scan on subselect_tbl b
+ Filter: ((f2 < 12) AND (f2 = a.f2))
+(7 rows)
+
+-- Flatten subquery with not-correlated clauses. The same result set returned
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+ FROM SUBSELECT_TBL upper
+ WHERE f1 IN
+ (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <>
42);
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------
+ Hash Join
+ Hash Cond: ((upper.f1 = subselect_tbl.f2) AND ((upper.f2)::double precision
= subselect_tbl.f3))
+ -> Seq Scan on subselect_tbl upper
+ -> Hash
+ -> HashAggregate
+ Group Key: subselect_tbl.f2, subselect_tbl.f3
+ -> Seq Scan on subselect_tbl
+ Filter: (f2 <> 42)
+(8 rows)
+
+SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+ FROM SUBSELECT_TBL upper
+ WHERE f1 IN
+ (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <>
42);
+ Correlated Field | Second Field
+------------------+--------------
+ 2 | 4
+ 3 | 5
+ 1 | 1
+ 2 | 2
+ 3 | 3
+(5 rows)
+
SELECT f1 AS "Correlated Field", f3 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f3 IN (SELECT upper.f1 + f2 FROM SUBSELECT_TBL
diff --git a/src/test/regress/sql/prepare.sql b/src/test/regress/sql/prepare.sql
index c6098dc95c..8a8164ee99 100644
--- a/src/test/regress/sql/prepare.sql
+++ b/src/test/regress/sql/prepare.sql
@@ -78,6 +78,13 @@ PREPARE q8 AS
SELECT name, statement, parameter_types, result_types FROM
pg_prepared_statements
ORDER BY name;
+-- The optimization on unnesting of correlated subqueries should work
+PREPARE q9(name,int) AS
+ SELECT * FROM tenk1 upper WHERE unique1 IN (
+ SELECT sub.unique2 FROM tenk1 sub
+ WHERE sub.stringu1 = $1 AND sub.unique1 = upper.unique2 + $2);
+EXPLAIN (COSTS OFF) EXECUTE q9('abc',2);
+
-- test DEALLOCATE ALL;
DEALLOCATE ALL;
SELECT name, statement, parameter_types FROM pg_prepared_statements
diff --git a/src/test/regress/sql/subselect.sql
b/src/test/regress/sql/subselect.sql
index 40276708c9..40ed61d508 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -67,11 +67,173 @@ SELECT f1 AS "Correlated Field", f2 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1);
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+ FROM SUBSELECT_TBL upper
+ WHERE f1 IN
+ (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
+-- Still doesn't work for NOT IN
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+ FROM SUBSELECT_TBL upper
+ WHERE f1 NOT IN
+ (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
+
SELECT f1 AS "Correlated Field", f3 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f1 IN
(SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
+-- Constraints, imposed by LATERAL references, prohibit flattening of
underlying
+-- Sublink.
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a
+ WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+ SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+SELECT count(*) FROM SUBSELECT_TBL a
+ WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+ SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a
+ WHERE NOT EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+ SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+
+-- Prohibit to unnest subquery - quals contain lateral references to rels
+-- outside a higher outer join.
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN (
+ SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2);
+SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN (
+ SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2);
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN
+ (SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f1)
+; -- Optimizer removes excess clause
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2*b.f1/b.f3+2 FROM subselect_tbl b WHERE b.f3 = a.f2)
+; -- a bit more complex targetlist expression shouldn't cut off the
optimization
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1 FROM subselect_tbl b WHERE b.f3 = a.f2)
+; -- Two variables in a target list
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1*2 FROM subselect_tbl b WHERE b.f3 =
a.f2)
+; -- Expression as an element of composite type shouldn't cut off the
optimization
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f1 =
b.f3 AND b.f3 <> 12)
+; -- Two expressions with correlated variables
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f2 =
b.f3 AND b.f1 < 12)
+; -- Two expressions with correlated variables relates on one upper variable.
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+ GROUP BY (b.f2)
+ )
+; -- Pull clauses without unnesting the query. XXX: It reduces performance in
most use cases, doesn't it?
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ SELECT b.f2 FROM (
+ SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,10) x
+ ) b WHERE b.f2 = a.f2 AND b.f1 BETWEEN 12 AND 14
+ UNION ALL
+ SELECT c.f1 FROM subselect_tbl c
+ WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14
+ )
+; -- Disallow flattening of union all
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ SELECT b.f1 FROM subselect_tbl b JOIN subselect_tbl c ON (b.f1 = c.f2)
+ WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14
+ )
+; -- XXX: Could we flatten such subquery?
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ SELECT b.f1 FROM subselect_tbl b, subselect_tbl c
+ WHERE b.f1 = c.f2 AND c.f2 = a.f2 AND c.f1 IS NOT NULL
+ )
+; -- TODO: Could we flatten such subquery?
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE (a.f1,f2) IN (
+ SELECT b.f2, avg(f3) FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+ GROUP BY (b.f2)
+ )
+; -- Doesn't support unnesting with aggregate functions
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ WITH cte AS (
+ SELECT * FROM subselect_tbl c WHERE f1 < 42 AND f2 = a.f1
+ )
+ SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12
+ )
+; -- Give up optimization if CTE in subquery contains links to upper relation.
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ WITH cte AS (
+ SELECT * FROM subselect_tbl c WHERE f1 < 42
+ )
+ SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12
+ )
+; -- Correlated subquery with trivial CTE can be pulled up
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE (a.f1,a.f3) IN (
+ SELECT b.f2, avg(b.f3) OVER (PARTITION BY b.f2)
+ FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+ )
+; -- Doesn't support unnesting with window functions in target list
+
+-- A having qual, group clause and so on, with links to upper relation variable
+-- cut off the optimization because another case we must rewrite the subquery
+-- as a lateral TargetEntry and arrange these links.
+-- But now, machinery of convert_ANY_sublink_to_join() isn't prepared for such
+-- complex work and it would induce additional complex code.
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ SELECT b.f2
+ FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+ GROUP BY (b.f2) HAVING b.f2 > a.f3
+ )
+;
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ SELECT b.f2 FROM (
+ SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,a.f1) x
+ ) b WHERE b.f2 = a.f2 AND b.f1 < 12
+ )
+; -- Don't allow links to upper query in FROM section of subquery
+EXPLAIN (COSTS OFF)
+ SELECT * FROM subselect_tbl a
+ WHERE a.f1 IN (
+ SELECT a.f1 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+ GROUP BY (a.f1)
+ )
+; -- GROUP BY contains link to upper relation
+
+-- Flatten subquery with not-correlated clauses. The same result set returned
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+ FROM SUBSELECT_TBL upper
+ WHERE f1 IN
+ (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <>
42);
+SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+ FROM SUBSELECT_TBL upper
+ WHERE f1 IN
+ (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <>
42);
+
SELECT f1 AS "Correlated Field", f3 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f3 IN (SELECT upper.f1 + f2 FROM SUBSELECT_TBL
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 97c9bc1861..4803e7a978 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -470,6 +470,7 @@ CopySource
CopyStmt
CopyToState
CopyToStateData
+correlated_t
Cost
CostSelector
Counters
--
2.37.3