On 5/10/2022 02:45, Zhihong Yu wrote:
Hi,
For contain_placeholders():

+   if (IsA(node, Query))
+       return query_tree_walker((Query *) node, contain_placeholders, context, 0);
+   else if (IsA(node, PlaceHolderVar))
Fixed

The `else` is not needed.

For correlated_t struct, it would be better if the fields have comments.
Ok, I've added some comments.

+                    * (for grouping, as an example). So, revert its status to
+                    * a full valued entry.

full valued -> fully valued
Fixed

--
regards,
Andrey Lepikhov
Postgres Professional
From da570914e53bc34e6bf3291649725a041a8b929c Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepik...@postgrespro.ru>
Date: Wed, 28 Sep 2022 13:42:12 +0300
Subject: [PATCH] Transform correlated subquery of type N-J [1] into ordinary
 join query. With many restrictions, check each subquery and pull up
 expressions, references upper query block. Works for operators '=' and 'IN'.
 Machinery of converting ANY subquery to JOIN stays the same with minor
 changes in walker function.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pass a lower outer join through the pullup_sublink recursive procedure to find
out some situations when qual references outer side of an outer join.

[1] Kim, Won. “On optimizing an SQL-like nested query.” ACM Trans. Database 
Syst. 7 (1982): 443-469.
---
 src/backend/optimizer/plan/subselect.c    | 328 +++++++++++++++-
 src/backend/optimizer/prep/prepjointree.c |  71 ++--
 src/backend/optimizer/util/tlist.c        |   2 +-
 src/backend/optimizer/util/var.c          |   8 +
 src/backend/utils/misc/guc_tables.c       |  10 +
 src/include/optimizer/optimizer.h         |   1 +
 src/include/optimizer/subselect.h         |   3 +-
 src/include/optimizer/tlist.h             |   1 +
 src/test/regress/expected/prepare.out     |  18 +
 src/test/regress/expected/subselect.out   | 438 ++++++++++++++++++++++
 src/test/regress/sql/prepare.sql          |   7 +
 src/test/regress/sql/subselect.sql        | 162 ++++++++
 src/tools/pgindent/typedefs.list          |   1 +
 13 files changed, 1013 insertions(+), 37 deletions(-)

diff --git a/src/backend/optimizer/plan/subselect.c 
b/src/backend/optimizer/plan/subselect.c
index 92e3338584..be19d85524 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -32,6 +32,7 @@
 #include "optimizer/planner.h"
 #include "optimizer/prep.h"
 #include "optimizer/subselect.h"
+#include "optimizer/tlist.h"
 #include "parser/parse_relation.h"
 #include "rewrite/rewriteManip.h"
 #include "utils/builtins.h"
@@ -65,6 +66,8 @@ typedef struct inline_cte_walker_context
 } inline_cte_walker_context;
 
 
+bool optimize_correlated_subqueries = true;
+
 static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
                                                   List *plan_params,
                                                   SubLinkType subLinkType, int 
subLinkId,
@@ -1229,6 +1232,296 @@ inline_cte_walker(Node *node, inline_cte_walker_context 
*context)
        return expression_tree_walker(node, inline_cte_walker, context);
 }
 
+static bool
+contain_placeholders(Node *node, inline_cte_walker_context *context)
+{
+       if (node == NULL)
+               return false;
+
+       if (IsA(node, Query))
+               return query_tree_walker((Query *) node, contain_placeholders, 
context, 0);
+       if (IsA(node, PlaceHolderVar))
+               return true;
+
+       return expression_tree_walker(node, contain_placeholders, context);
+}
+
+/*
+ * To be pullable all clauses of flattening correlated subquery should be
+ * anded and mergejoinable (XXX: really necessary?)
+ */
+static bool
+quals_is_pullable(Node *quals)
+{
+       if (!contain_vars_of_level(quals, 1))
+               return true;
+
+       if (quals && IsA(quals, OpExpr))
+       {
+               OpExpr *expr = (OpExpr *) quals;
+               Node   *leftarg;
+
+               /* Contains only one expression */
+               leftarg = linitial(expr->args);
+               if (!op_mergejoinable(expr->opno, exprType(leftarg))) /* Is it 
really necessary ? */
+                       return false;
+
+               if (contain_placeholders(quals, NULL))
+                       return false;
+
+               return true;
+       }
+       else if (is_andclause(quals))
+       {
+               ListCell   *l;
+
+               foreach(l, ((BoolExpr *) quals)->args)
+               {
+                       Node *andarg = (Node *) lfirst(l);
+
+                       if (!IsA(andarg, OpExpr))
+                               return false;
+                       if (!quals_is_pullable(andarg))
+                               return false;
+               }
+
+               return true;
+       }
+
+       return false;
+}
+
+/*
+ * Mutator context used in pull-up process of expressions from WHERE quals.
+ */
+typedef struct
+{
+       Query  *subquery; /* Link to the subquery which we are trying to pull 
up */
+       List   *pulling_quals; /* List of expressions contained pulled 
expressions */
+       int             newvarno; /* Index or range table entry which is a 
source of pulling
+                                                varno */
+       bool    varlevel_up; /* true if a pulling expression is being processed,
+                                                       false otherwise. */
+} correlated_t;
+
+/*
+ * Pull up expressions, containing a link to upper relation. In the qual it 
will
+ * be replaced by TRUE const. In this expression, all links to upper levels 
will
+ * be arranged by one level.
+ */
+static Node *
+pull_subquery_clauses_mutator(Node *node, correlated_t *ctx)
+{
+       if (node == NULL)
+               return NULL;
+
+       if (IsA(node, OpExpr) && !ctx->varlevel_up)
+       {
+               if (!contain_vars_of_level(node, 1))
+                       return node;
+
+               /*
+                * The expression contains links to upper relation. It will be 
pulled to
+                * uplevel. All links into vars of upper levels must be changed.
+                */
+
+               ctx->varlevel_up = true;
+               ctx->pulling_quals =
+                       lappend(ctx->pulling_quals,
+                                       expression_tree_mutator(node,
+                                                                               
        pull_subquery_clauses_mutator,
+                                                                               
        (void *) ctx));
+               ctx->varlevel_up = false;
+
+               /* Replace position of pulled expression by the 'true' value */
+               return makeBoolConst(true, false);
+       }
+       if (IsA(node, PlaceHolderVar))
+       {
+               PlaceHolderVar *phv = (PlaceHolderVar *) node;
+
+               if (ctx->varlevel_up && phv->phlevelsup > 0)
+                       phv->phlevelsup--;
+               /* fall through to recurse into argument */
+       }
+       else if (IsA(node, RangeTblEntry))
+       {
+               RangeTblEntry *rte = (RangeTblEntry *) node;
+
+               if (rte->rtekind == RTE_CTE)
+               {
+                       if (rte->ctelevelsup > 0 && ctx->varlevel_up)
+                               rte->ctelevelsup--;
+               }
+               return node;
+       }
+       else if (IsA(node, Aggref))
+       {
+               if (((Aggref *) node)->agglevelsup > 0 && ctx->varlevel_up)
+                       ((Aggref *) node)->agglevelsup--;
+               return node;
+       }
+       else if (IsA(node, GroupingFunc))
+       {
+               if (((GroupingFunc *) node)->agglevelsup > 0 && 
ctx->varlevel_up)
+                       ((GroupingFunc *) node)->agglevelsup--;
+               return node;
+       }
+       else if (IsA(node, Var))
+       {
+               Var *var = (Var *) node;
+
+               Assert(ctx->varlevel_up);
+
+               /* An upper relation variable */
+               if (var->varlevelsup > 0)
+               {
+                       /*
+                        * Isn't needed to copy node or change varno because it 
correctly
+                        * refers to Table Entry of a parent and already 
removed from
+                        * the subquery clauses list.
+                        */
+                       var->varlevelsup--;
+
+                       return (Node *) var;
+               }
+               else
+               {
+                       Var                       *newvar;
+                       TargetEntry   *tle;
+
+                       /*
+                        * The var refers to subquery table entry. Include a 
copy the var
+                        * into the target list, if necessary. Arrange varattno 
of the
+                        * new var of upper relation with a link to this entry.
+                        */
+
+                       /* Create a var for usage in upper relation */
+                       newvar = (Var *) copyObject(node);
+
+                       /* Does the var already exists in the target list? */
+                       tle = tlist_member_match_var(var, 
ctx->subquery->targetList);
+
+                       if (tle == NULL)
+                       {
+                               int resno = 
list_length(ctx->subquery->targetList) + 1;
+
+                               /*
+                                * Target list of the subquery doesn't contain 
this var. Add it
+                                * into the end of the target list and correct 
the link
+                                * XXX: Maybe choose real colname here?
+                                */
+                               tle = makeTargetEntry((Expr *) var, resno, 
"rescol", false);
+                               ctx->subquery->targetList = 
lappend(ctx->subquery->targetList,
+                                                                               
                        tle);
+                       }
+                       else
+                       {
+                               if (tle->resjunk)
+                               {
+                                       /*
+                                        * Target entry exists but used as an 
utility entry
+                                        * (for grouping, as an example). So, 
revert its status to
+                                        * a fully valued entry.
+                                        */
+                                       tle->resjunk = false;
+                                       tle->resname = pstrdup("resjunkcol");
+                               }
+                       }
+
+                       /*
+                        * Set the new var to refer newly created RangeTblEntry 
in the upper
+                        * query and varattno to refer at specific position in 
the target
+                        * list.
+                        */
+                       newvar->varno = ctx->newvarno;
+                       newvar->varattno = tle->resno;
+
+                       return (Node *) newvar;
+               }
+       }
+       if (IsA(node, Query))
+               return (Node *) query_tree_mutator((Query *) node,
+                                                                               
   pull_subquery_clauses_mutator,
+                                                                               
   (void *) ctx, 0);
+
+       return expression_tree_mutator(node, pull_subquery_clauses_mutator,
+                                                                  (void *) 
ctx);
+}
+
+static List *
+pull_correlated_clauses(PlannerInfo *root, SubLink *sublink,
+                                               JoinExpr *lowest_outer_join)
+{
+       Query              *parse = root->parse;
+       Query              *subselect = (Query *) sublink->subselect;
+       FromExpr           *f;
+       correlated_t    ctx = {.subquery = subselect,
+                                                  .newvarno = 
list_length(parse->rtable) + 1, /* Looks like a hack */
+                                                  .pulling_quals = NIL,
+                                                  .varlevel_up = false};
+       Relids          safe_upper_varnos = NULL;
+
+       Assert(IsA(subselect, Query));
+
+       /* Use only for correlated candidates, just for optimal usage */
+       Assert(contain_vars_of_level((Node *) subselect, 1));
+
+       if (!optimize_correlated_subqueries ||
+               subselect->hasAggs ||
+               subselect->hasWindowFuncs ||
+               subselect->hasForUpdate || /* Pulling of clauses can change a 
number of tuples which subselect returns. */
+               subselect->hasRowSecurity /* Just because of paranoid safety */
+               )
+               /* The feature is switched off. */
+               return NULL;
+
+       /*
+        * We pull up quals and arrange variable levels for expressions in WHERE
+        * section only. So, cut the optimization off if an upper relation links
+        * from another parts of the subquery are detected.
+        */
+       if (contain_vars_of_level((Node *) subselect->cteList, 1) ||
+               /* see comments in subselect.sql */
+               contain_vars_of_level((Node *) subselect->rtable, 1) ||
+               contain_vars_of_level((Node *) subselect->targetList, 1) ||
+               contain_vars_of_level((Node *) subselect->returningList, 1) ||
+               contain_vars_of_level((Node *) subselect->groupingSets, 1) ||
+               contain_vars_of_level((Node *) subselect->distinctClause, 1) ||
+               contain_vars_of_level((Node *) subselect->sortClause, 1) ||
+               contain_vars_of_level((Node *) subselect->limitOffset, 1) ||
+               contain_vars_of_level((Node *) subselect->limitCount, 1) ||
+               contain_vars_of_level((Node *) subselect->rowMarks, 1) ||
+               contain_vars_of_level((Node *) subselect->havingQual, 1) ||
+               contain_vars_of_level((Node *) subselect->groupClause, 1))
+               return NULL;
+
+       f = subselect->jointree;
+
+       if (!f || !f->quals || !quals_is_pullable(f->quals))
+               return NULL;
+
+       if (lowest_outer_join)
+               safe_upper_varnos = get_relids_in_jointree(
+                               (lowest_outer_join->jointype == JOIN_RIGHT) ?
+                                       lowest_outer_join->larg : 
lowest_outer_join->rarg, true);
+
+       if (safe_upper_varnos &&
+               !bms_is_subset(pull_varnos_of_level(root, f->quals, 1),
+                                          safe_upper_varnos))
+               return NULL;
+
+       /*
+        * Now, is proved that it is possible to pull up expressions with 
variables
+        * from the upper query.
+        * Pull up quals, containing correlated expressions. Replace its
+        * positions with a true boolean expression.
+        * It would be removed on a next planning stage.
+        */
+       f->quals = pull_subquery_clauses_mutator(f->quals, (void *) &ctx);
+
+       return ctx.pulling_quals;
+}
 
 /*
  * convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join
@@ -1266,7 +1559,7 @@ inline_cte_walker(Node *node, inline_cte_walker_context 
*context)
  */
 JoinExpr *
 convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
-                                                       Relids available_rels)
+                                                       Relids available_rels, 
JoinExpr *lowest_outer_join)
 {
        JoinExpr   *result;
        Query      *parse = root->parse;
@@ -1279,16 +1572,10 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink 
*sublink,
        List       *subquery_vars;
        Node       *quals;
        ParseState *pstate;
+       List       *pclauses = NIL;
 
        Assert(sublink->subLinkType == ANY_SUBLINK);
 
-       /*
-        * The sub-select must not refer to any Vars of the parent query. (Vars 
of
-        * higher levels should be okay, though.)
-        */
-       if (contain_vars_of_level((Node *) subselect, 1))
-               return NULL;
-
        /*
         * The test expression must contain some Vars of the parent query, else
         * it's not gonna be a join.  (Note that it won't have Vars referring to
@@ -1310,6 +1597,17 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink 
*sublink,
        if (contain_volatile_functions(sublink->testexpr))
                return NULL;
 
+       /*
+        * The sub-select must not refer to any Vars of the parent query. (Vars 
of
+        * higher levels should be okay, though.)
+        * In the case of correlated subquery, jointree quals structure will be
+        * modified: expressions with variables from upper query moves to the
+        * pulled_clauses list, their places in the quals replaces by "true" 
value.
+        */
+       if (contain_vars_of_level((Node *) subselect, 1) &&
+               (pclauses = pull_correlated_clauses(root, sublink, 
lowest_outer_join)) == NIL)
+               return NULL;
+
        /* Create a dummy ParseState for addRangeTableEntryForSubquery */
        pstate = make_parsestate(NULL);
 
@@ -1348,6 +1646,20 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink 
*sublink,
         */
        quals = convert_testexpr(root, sublink->testexpr, subquery_vars);
 
+       /* Nested subquery with references to upper level relation. */
+       if (pclauses != NIL)
+       {
+               /* Add clauses, pulled from subquery into WHERE section of the 
parent. */
+               if (IsA(quals, BoolExpr))
+               {
+                       BoolExpr *b = (BoolExpr *) quals;
+                       b->args = list_concat(b->args, pclauses);
+               }
+               else
+                       quals = (Node *) make_andclause(
+                                                                       
list_concat(list_make1(quals), pclauses));
+       }
+
        /*
         * And finally, build the JoinExpr node.
         */
diff --git a/src/backend/optimizer/prep/prepjointree.c 
b/src/backend/optimizer/prep/prepjointree.c
index 41c7066d90..f72b8b1320 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -62,10 +62,12 @@ typedef struct reduce_outer_joins_state
 } reduce_outer_joins_state;
 
 static Node *pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
-                                                                               
           Relids *relids);
+                                                                               
           Relids *relids,
+                                                                               
           JoinExpr *lowest_outer_join);
 static Node *pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
                                                                                
   Node **jtlink1, Relids available_rels1,
-                                                                               
   Node **jtlink2, Relids available_rels2);
+                                                                               
   Node **jtlink2, Relids available_rels2,
+                                                                               
   JoinExpr *lowest_outer_join);
 static Node *pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode,
                                                                                
JoinExpr *lowest_outer_join,
                                                                                
JoinExpr *lowest_nulling_outer_join,
@@ -293,7 +295,7 @@ pull_up_sublinks(PlannerInfo *root)
        /* Begin recursion through the jointree */
        jtnode = pull_up_sublinks_jointree_recurse(root,
                                                                                
           (Node *) root->parse->jointree,
-                                                                               
           &relids);
+                                                                               
           &relids, NULL);
 
        /*
         * root->parse->jointree must always be a FromExpr, so insert a dummy 
one
@@ -313,7 +315,7 @@ pull_up_sublinks(PlannerInfo *root)
  */
 static Node *
 pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
-                                                                 Relids 
*relids)
+                                                                 Relids 
*relids, JoinExpr *lowest_outer_join)
 {
        if (jtnode == NULL)
        {
@@ -343,7 +345,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node 
*jtnode,
 
                        newchild = pull_up_sublinks_jointree_recurse(root,
                                                                                
                                 lfirst(l),
-                                                                               
                                 &childrelids);
+                                                                               
                                 &childrelids,
+                                                                               
                                 lowest_outer_join);
                        newfromlist = lappend(newfromlist, newchild);
                        frelids = bms_join(frelids, childrelids);
                }
@@ -354,7 +357,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node 
*jtnode,
                /* Now process qual --- all children are available for use */
                newf->quals = pull_up_sublinks_qual_recurse(root, f->quals,
                                                                                
                        &jtlink, frelids,
-                                                                               
                        NULL, NULL);
+                                                                               
                        NULL, NULL,
+                                                                               
                        lowest_outer_join);
 
                /*
                 * Note that the result will be either newf, or a stack of 
JoinExprs
@@ -385,9 +389,11 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node 
*jtnode,
 
                /* Recurse to process children and collect their relids */
                j->larg = pull_up_sublinks_jointree_recurse(root, j->larg,
-                                                                               
                        &leftrelids);
+                                                                               
                        &leftrelids,
+                                                                               
                        lowest_outer_join);
                j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg,
-                                                                               
                        &rightrelids);
+                                                                               
                        &rightrelids,
+                                                                               
                        lowest_outer_join);
 
                /*
                 * Now process qual, showing appropriate child relids as 
available,
@@ -408,13 +414,14 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node 
*jtnode,
                                                                                
                                 &jtlink,
                                                                                
                                 bms_union(leftrelids,
                                                                                
                                                   rightrelids),
-                                                                               
                                 NULL, NULL);
+                                                                               
                                 NULL, NULL,
+                                                                               
                                 lowest_outer_join);
                                break;
                        case JOIN_LEFT:
                                j->quals = pull_up_sublinks_qual_recurse(root, 
j->quals,
                                                                                
                                 &j->rarg,
                                                                                
                                 rightrelids,
-                                                                               
                                 NULL, NULL);
+                                                                               
                                 NULL, NULL, j);
                                break;
                        case JOIN_FULL:
                                /* can't do anything with full-join quals */
@@ -423,7 +430,7 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node 
*jtnode,
                                j->quals = pull_up_sublinks_qual_recurse(root, 
j->quals,
                                                                                
                                 &j->larg,
                                                                                
                                 leftrelids,
-                                                                               
                                 NULL, NULL);
+                                                                               
                                 NULL, NULL, j);
                                break;
                        default:
                                elog(ERROR, "unrecognized join type: %d",
@@ -468,7 +475,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node 
*jtnode,
 static Node *
 pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
                                                          Node **jtlink1, 
Relids available_rels1,
-                                                         Node **jtlink2, 
Relids available_rels2)
+                                                         Node **jtlink2, 
Relids available_rels2,
+                                                         JoinExpr 
*lowest_outer_join)
 {
        if (node == NULL)
                return NULL;
@@ -482,7 +490,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
                if (sublink->subLinkType == ANY_SUBLINK)
                {
                        if ((j = convert_ANY_sublink_to_join(root, sublink,
-                                                                               
                 available_rels1)) != NULL)
+                                                                               
                 available_rels1,
+                                                                               
                 lowest_outer_join)) != NULL)
                        {
                                /* Yes; insert the new join node into the join 
tree */
                                j->larg = *jtlink1;
@@ -490,7 +499,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
                                /* Recursively process pulled-up jointree nodes 
*/
                                j->rarg = 
pull_up_sublinks_jointree_recurse(root,
                                                                                
                                        j->rarg,
-                                                                               
                                        &child_rels);
+                                                                               
                                        &child_rels, j);
 
                                /*
                                 * Now recursively process the pulled-up quals. 
 Any inserted
@@ -502,13 +511,15 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node 
*node,
                                                                                
                                 &j->larg,
                                                                                
                                 available_rels1,
                                                                                
                                 &j->rarg,
-                                                                               
                                 child_rels);
+                                                                               
                                 child_rels,
+                                                                               
                                 j);
                                /* Return NULL representing constant TRUE */
                                return NULL;
                        }
                        if (available_rels2 != NULL &&
                                (j = convert_ANY_sublink_to_join(root, sublink,
-                                                                               
                 available_rels2)) != NULL)
+                                                                               
                 available_rels2,
+                                                                               
                 lowest_outer_join)) != NULL)
                        {
                                /* Yes; insert the new join node into the join 
tree */
                                j->larg = *jtlink2;
@@ -516,7 +527,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
                                /* Recursively process pulled-up jointree nodes 
*/
                                j->rarg = 
pull_up_sublinks_jointree_recurse(root,
                                                                                
                                        j->rarg,
-                                                                               
                                        &child_rels);
+                                                                               
                                        &child_rels, j);
 
                                /*
                                 * Now recursively process the pulled-up quals. 
 Any inserted
@@ -528,7 +539,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
                                                                                
                                 &j->larg,
                                                                                
                                 available_rels2,
                                                                                
                                 &j->rarg,
-                                                                               
                                 child_rels);
+                                                                               
                                 child_rels,
+                                                                               
                                 j);
                                /* Return NULL representing constant TRUE */
                                return NULL;
                        }
@@ -544,7 +556,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
                                /* Recursively process pulled-up jointree nodes 
*/
                                j->rarg = 
pull_up_sublinks_jointree_recurse(root,
                                                                                
                                        j->rarg,
-                                                                               
                                        &child_rels);
+                                                                               
                                        &child_rels, j);
 
                                /*
                                 * Now recursively process the pulled-up quals. 
 Any inserted
@@ -556,7 +568,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
                                                                                
                                 &j->larg,
                                                                                
                                 available_rels1,
                                                                                
                                 &j->rarg,
-                                                                               
                                 child_rels);
+                                                                               
                                 child_rels,
+                                                                               
                                 j);
                                /* Return NULL representing constant TRUE */
                                return NULL;
                        }
@@ -570,7 +583,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
                                /* Recursively process pulled-up jointree nodes 
*/
                                j->rarg = 
pull_up_sublinks_jointree_recurse(root,
                                                                                
                                        j->rarg,
-                                                                               
                                        &child_rels);
+                                                                               
                                        &child_rels, j);
 
                                /*
                                 * Now recursively process the pulled-up quals. 
 Any inserted
@@ -582,7 +595,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
                                                                                
                                 &j->larg,
                                                                                
                                 available_rels2,
                                                                                
                                 &j->rarg,
-                                                                               
                                 child_rels);
+                                                                               
                                 child_rels,
+                                                                               
                                 j);
                                /* Return NULL representing constant TRUE */
                                return NULL;
                        }
@@ -610,7 +624,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
                                        /* Recursively process pulled-up 
jointree nodes */
                                        j->rarg = 
pull_up_sublinks_jointree_recurse(root,
                                                                                
                                                j->rarg,
-                                                                               
                                                &child_rels);
+                                                                               
                                                &child_rels, j);
 
                                        /*
                                         * Now recursively process the 
pulled-up quals.  Because
@@ -622,7 +636,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
                                                                                
                                         j->quals,
                                                                                
                                         &j->rarg,
                                                                                
                                         child_rels,
-                                                                               
                                         NULL, NULL);
+                                                                               
                                         NULL, NULL,
+                                                                               
                                         j);
                                        /* Return NULL representing constant 
TRUE */
                                        return NULL;
                                }
@@ -636,7 +651,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
                                        /* Recursively process pulled-up 
jointree nodes */
                                        j->rarg = 
pull_up_sublinks_jointree_recurse(root,
                                                                                
                                                j->rarg,
-                                                                               
                                                &child_rels);
+                                                                               
                                                &child_rels, j);
 
                                        /*
                                         * Now recursively process the 
pulled-up quals.  Because
@@ -648,7 +663,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
                                                                                
                                         j->quals,
                                                                                
                                         &j->rarg,
                                                                                
                                         child_rels,
-                                                                               
                                         NULL, NULL);
+                                                                               
                                         NULL, NULL,
+                                                                               
                                         j);
                                        /* Return NULL representing constant 
TRUE */
                                        return NULL;
                                }
@@ -673,7 +689,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
                                                                                
                          jtlink1,
                                                                                
                          available_rels1,
                                                                                
                          jtlink2,
-                                                                               
                          available_rels2);
+                                                                               
                          available_rels2,
+                                                                               
                          lowest_outer_join);
                        if (newclause)
                                newclauses = lappend(newclauses, newclause);
                }
diff --git a/src/backend/optimizer/util/tlist.c 
b/src/backend/optimizer/util/tlist.c
index 784a1af82d..5b7aee121f 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -98,7 +98,7 @@ tlist_member(Expr *node, List *targetlist)
  * This is needed in some cases where we can't be sure of an exact typmod
  * match.  For safety, though, we insist on vartype match.
  */
-static TargetEntry *
+TargetEntry *
 tlist_member_match_var(Var *var, List *targetlist)
 {
        ListCell   *temp;
diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c
index 7db86c39ef..54441e692b 100644
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@@ -461,6 +461,14 @@ contain_vars_of_level_walker(Node *node, int *sublevels_up)
                        return true;            /* abort the tree traversal and 
return true */
                /* else fall through to check the contained expr */
        }
+       if (IsA(node, RangeTblEntry))
+       {
+               RangeTblEntry *rte = (RangeTblEntry *) node;
+
+               /* Someone can call the routine on a field of Query struct */
+               return range_table_entry_walker(rte, 
contain_vars_of_level_walker,
+                                                                               
(void *) sublevels_up, 0);
+       }
        if (IsA(node, Query))
        {
                /* Recurse into subselects */
diff --git a/src/backend/utils/misc/guc_tables.c 
b/src/backend/utils/misc/guc_tables.c
index 05ab087934..55791b474b 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -765,6 +765,16 @@ StaticAssertDecl(lengthof(config_type_names) == (PGC_ENUM 
+ 1),
 
 struct config_bool ConfigureNamesBool[] =
 {
+       {
+               {"optimize_correlated_subqueries", PGC_USERSET, 
QUERY_TUNING_METHOD,
+                       gettext_noop("optimize_correlated_subqueries."),
+                       NULL,
+                       GUC_EXPLAIN | GUC_NOT_IN_SAMPLE
+               },
+               &optimize_correlated_subqueries,
+               true,
+               NULL, NULL, NULL
+       },
        {
                {"enable_seqscan", PGC_USERSET, QUERY_TUNING_METHOD,
                        gettext_noop("Enables the planner's use of 
sequential-scan plans."),
diff --git a/src/include/optimizer/optimizer.h 
b/src/include/optimizer/optimizer.h
index 409005bae9..cdf3fdce1a 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -88,6 +88,7 @@ extern PGDLLIMPORT double parallel_tuple_cost;
 extern PGDLLIMPORT double parallel_setup_cost;
 extern PGDLLIMPORT double recursive_worktable_factor;
 extern PGDLLIMPORT int effective_cache_size;
+extern PGDLLIMPORT bool optimize_correlated_subqueries;
 
 extern double clamp_row_est(double nrows);
 extern long clamp_cardinality_to_long(Cardinality x);
diff --git a/src/include/optimizer/subselect.h 
b/src/include/optimizer/subselect.h
index 456d3076e0..4e126e45ee 100644
--- a/src/include/optimizer/subselect.h
+++ b/src/include/optimizer/subselect.h
@@ -19,7 +19,8 @@
 extern void SS_process_ctes(PlannerInfo *root);
 extern JoinExpr *convert_ANY_sublink_to_join(PlannerInfo *root,
                                                                                
         SubLink *sublink,
-                                                                               
         Relids available_rels);
+                                                                               
         Relids available_rels,
+                                                                               
         JoinExpr *lowest_outer_join);
 extern JoinExpr *convert_EXISTS_sublink_to_join(PlannerInfo *root,
                                                                                
                SubLink *sublink,
                                                                                
                bool under_not,
diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h
index 04668ba1c0..7627e7f679 100644
--- a/src/include/optimizer/tlist.h
+++ b/src/include/optimizer/tlist.h
@@ -18,6 +18,7 @@
 
 
 extern TargetEntry *tlist_member(Expr *node, List *targetlist);
+extern TargetEntry *tlist_member_match_var(Var *var, List *targetlist);
 
 extern List *add_to_flat_tlist(List *tlist, List *exprs);
 
diff --git a/src/test/regress/expected/prepare.out 
b/src/test/regress/expected/prepare.out
index 5815e17b39..749b3faf64 100644
--- a/src/test/regress/expected/prepare.out
+++ b/src/test/regress/expected/prepare.out
@@ -184,6 +184,24 @@ SELECT name, statement, parameter_types, result_types FROM 
pg_prepared_statement
       |     UPDATE tenk1 SET stringu1 = $2 WHERE unique1 = $1;           |     
                                               | 
 (6 rows)
 
+-- The optimization on unnesting of correlated subqueries should work
+PREPARE q9(name,int) AS
+    SELECT * FROM tenk1 upper WHERE unique1 IN (
+        SELECT sub.unique2 FROM tenk1 sub
+        WHERE sub.stringu1 = $1 AND sub.unique1 = upper.unique2 + $2);
+EXPLAIN (COSTS OFF) EXECUTE q9('abc',2);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Nested Loop
+   ->  HashAggregate
+         Group Key: sub.unique2, sub.unique1
+         ->  Seq Scan on tenk1 sub
+               Filter: (stringu1 = 'abc'::name)
+   ->  Index Scan using tenk1_unique1 on tenk1 upper
+         Index Cond: (unique1 = sub.unique2)
+         Filter: (sub.unique1 = (unique2 + 2))
+(8 rows)
+
 -- test DEALLOCATE ALL;
 DEALLOCATE ALL;
 SELECT name, statement, parameter_types FROM pg_prepared_statements
diff --git a/src/test/regress/expected/subselect.out 
b/src/test/regress/expected/subselect.out
index 63d26d44fc..1523770984 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -164,6 +164,35 @@ SELECT f1 AS "Correlated Field", f2 AS "Second Field"
                 3 |            3
 (6 rows)
 
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
+                                             QUERY PLAN                        
                     
+----------------------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((upper.f1 = subselect_tbl.f2) AND ((upper.f2)::double precision 
= subselect_tbl.f3))
+   ->  Seq Scan on subselect_tbl upper
+   ->  Hash
+         ->  HashAggregate
+               Group Key: subselect_tbl.f2, subselect_tbl.f3
+               ->  Seq Scan on subselect_tbl
+(7 rows)
+
+-- Still doesn't work for NOT IN
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 NOT IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Seq Scan on subselect_tbl upper
+   Filter: (NOT (SubPlan 1))
+   SubPlan 1
+     ->  Seq Scan on subselect_tbl
+           Filter: ((upper.f2)::double precision = f3)
+(5 rows)
+
 SELECT f1 AS "Correlated Field", f3 AS "Second Field"
   FROM SUBSELECT_TBL upper
   WHERE f1 IN
@@ -177,6 +206,415 @@ SELECT f1 AS "Correlated Field", f3 AS "Second Field"
                 3 |            3
 (5 rows)
 
+-- Constraints, imposed by LATERAL references, prohibit flattening of 
underlying
+-- Sublink.
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a
+  WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+      SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+                  QUERY PLAN                   
+-----------------------------------------------
+ Aggregate
+   ->  Nested Loop Semi Join
+         Join Filter: (SubPlan 1)
+         ->  Seq Scan on subselect_tbl a
+         ->  Materialize
+               ->  Seq Scan on subselect_tbl b
+         SubPlan 1
+           ->  Seq Scan on subselect_tbl c
+                 Filter: (f1 = a.f2)
+(9 rows)
+
+SELECT count(*) FROM SUBSELECT_TBL a
+  WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+      SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+ count 
+-------
+     5
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a
+  WHERE NOT EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+      SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+                  QUERY PLAN                   
+-----------------------------------------------
+ Aggregate
+   ->  Nested Loop Anti Join
+         Join Filter: (SubPlan 1)
+         ->  Seq Scan on subselect_tbl a
+         ->  Materialize
+               ->  Seq Scan on subselect_tbl b
+         SubPlan 1
+           ->  Seq Scan on subselect_tbl c
+                 Filter: (f1 = a.f2)
+(9 rows)
+
+-- Prohibit to unnest subquery - quals contain lateral references to rels
+-- outside a higher outer join.
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN (
+  SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2);
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Aggregate
+   ->  Nested Loop Left Join
+         Join Filter: (SubPlan 1)
+         ->  Seq Scan on subselect_tbl a
+         ->  Materialize
+               ->  Seq Scan on subselect_tbl b
+         SubPlan 1
+           ->  Seq Scan on subselect_tbl c
+                 Filter: (f3 = (a.f2)::double precision)
+(9 rows)
+
+SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN (
+  SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2);
+ count 
+-------
+    18
+(1 row)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN
+    (SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f1)
+; -- Optimizer removes excess clause
+                  QUERY PLAN                   
+-----------------------------------------------
+ Hash Join
+   Hash Cond: (a.f1 = b.f2)
+   ->  Seq Scan on subselect_tbl a
+   ->  Hash
+         ->  HashAggregate
+               Group Key: b.f2, b.f2
+               ->  Seq Scan on subselect_tbl b
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2*b.f1/b.f3+2 FROM subselect_tbl b WHERE b.f3 = a.f2)
+; -- a bit more complex targetlist expression shouldn't cut off the 
optimization
+                                                                       QUERY 
PLAN                                                                       
+--------------------------------------------------------------------------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (((a.f1)::double precision = ((((b.f2 * b.f1))::double precision 
/ b.f3) + '2'::double precision)) AND ((a.f2)::double precision = b.f3))
+   ->  Seq Scan on subselect_tbl a
+   ->  Hash
+         ->  HashAggregate
+               Group Key: ((((b.f2 * b.f1))::double precision / b.f3) + 
'2'::double precision), b.f3
+               ->  Seq Scan on subselect_tbl b
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1 FROM subselect_tbl b WHERE b.f3 = a.f2)
+; -- Two variables in a target list
+                                                QUERY PLAN                     
                           
+----------------------------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((a.f1 = b.f2) AND (a.f3 = (b.f1)::double precision) AND 
((a.f2)::double precision = b.f3))
+   ->  Seq Scan on subselect_tbl a
+   ->  Hash
+         ->  HashAggregate
+               Group Key: b.f2, (b.f1)::double precision, b.f3
+               ->  Seq Scan on subselect_tbl b
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1*2 FROM subselect_tbl b WHERE b.f3 = 
a.f2)
+; -- Expression as an element of composite type shouldn't cut off the 
optimization
+                                                   QUERY PLAN                  
                                 
+----------------------------------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((a.f1 = b.f2) AND (a.f3 = ((b.f1 * 2))::double precision) AND 
((a.f2)::double precision = b.f3))
+   ->  Seq Scan on subselect_tbl a
+   ->  Hash
+         ->  HashAggregate
+               Group Key: b.f2, ((b.f1 * 2))::double precision, b.f3
+               ->  Seq Scan on subselect_tbl b
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f1 = 
b.f3 AND b.f3 <> 12)
+; -- Two expressions with correlated variables
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((b.f2 = a.f1) AND (b.f3 = (a.f2)::double precision))
+   ->  HashAggregate
+         Group Key: b.f2, b.f3, b.f3
+         ->  Seq Scan on subselect_tbl b
+               Filter: (f3 <> '12'::double precision)
+   ->  Hash
+         ->  Seq Scan on subselect_tbl a
+               Filter: ((f2)::double precision = (f1)::double precision)
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f2 = 
b.f3 AND b.f1 < 12)
+; -- Two expressions with correlated variables relates on one upper variable.
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((a.f1 = b.f2) AND ((a.f2)::double precision = b.f3))
+   ->  Seq Scan on subselect_tbl a
+   ->  Hash
+         ->  HashAggregate
+               Group Key: b.f2, b.f3, b.f3
+               ->  Seq Scan on subselect_tbl b
+                     Filter: (f1 < 12)
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (b.f2)
+  )
+; -- Pull clauses without unnesting the query. XXX: It reduces performance in 
most use cases, doesn't it?
+               QUERY PLAN                
+-----------------------------------------
+ Hash Join
+   Hash Cond: (b.f2 = a.f1)
+   ->  HashAggregate
+         Group Key: b.f2
+         ->  Seq Scan on subselect_tbl b
+               Filter: (f2 < 12)
+   ->  Hash
+         ->  Seq Scan on subselect_tbl a
+               Filter: (f1 = f2)
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2 FROM (
+      SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,10) x
+    ) b WHERE b.f2 = a.f2 AND b.f1 BETWEEN 12 AND 14
+      UNION ALL
+    SELECT c.f1 FROM subselect_tbl c
+    WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14
+  )
+; -- Disallow flattening of union all
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Append
+           ->  Function Scan on generate_series x
+                 Filter: ((x >= 12) AND (x <= 14) AND ((x + 1) = a.f2))
+           ->  Seq Scan on subselect_tbl c
+                 Filter: ((f1 >= 12) AND (f1 <= 14) AND (f2 = a.f2))
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f1 FROM subselect_tbl b JOIN subselect_tbl c ON (b.f1 = c.f2)
+    WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14
+  )
+; -- XXX: Could we flatten such subquery?
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Nested Loop
+           ->  Seq Scan on subselect_tbl c
+                 Filter: ((f1 >= 12) AND (f1 <= 14) AND (f2 = a.f2))
+           ->  Seq Scan on subselect_tbl b
+                 Filter: (f1 = a.f2)
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f1 FROM subselect_tbl b, subselect_tbl c
+    WHERE b.f1 = c.f2 AND c.f2 = a.f2 AND c.f1 IS NOT NULL
+  )
+; -- TODO: Could we flatten such subquery?
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Nested Loop
+           ->  Seq Scan on subselect_tbl b
+                 Filter: (f1 = a.f2)
+           ->  Materialize
+                 ->  Seq Scan on subselect_tbl c
+                       Filter: ((f1 IS NOT NULL) AND (f2 = a.f2))
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE (a.f1,f2) IN (
+    SELECT b.f2, avg(f3) FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (b.f2)
+  )
+; -- Doesn't support unnesting with aggregate functions
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  GroupAggregate
+           Group Key: b.f2
+           ->  Seq Scan on subselect_tbl b
+                 Filter: ((f2 < 12) AND (f2 = a.f2))
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    WITH cte AS (
+      SELECT * FROM subselect_tbl c WHERE f1 < 42 AND f2 = a.f1
+    )
+    SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12
+  )
+; -- Give up optimization if CTE in subquery contains links to upper relation.
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Result
+           One-Time Filter: (a.f1 = a.f2)
+           ->  Seq Scan on subselect_tbl c
+                 Filter: ((f1 < 42) AND (f2 < 12) AND (f2 = a.f2))
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    WITH cte AS (
+      SELECT * FROM subselect_tbl c WHERE f1 < 42
+    )
+    SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12
+  )
+; -- Correlated subquery with trivial CTE can be pulled up
+                   QUERY PLAN                    
+-------------------------------------------------
+ Hash Semi Join
+   Hash Cond: (a.f1 = c.f2)
+   ->  Seq Scan on subselect_tbl a
+         Filter: (f1 = f2)
+   ->  Hash
+         ->  Seq Scan on subselect_tbl c
+               Filter: ((f1 < 42) AND (f2 < 12))
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE (a.f1,a.f3) IN (
+    SELECT b.f2, avg(b.f3) OVER (PARTITION BY b.f2)
+    FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+  )
+; -- Doesn't support unnesting with window functions in target list
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  WindowAgg
+           ->  Seq Scan on subselect_tbl b
+                 Filter: ((f2 < 12) AND (f2 = a.f2))
+(6 rows)
+
+-- A having qual, group clause and so on, with links to upper relation variable
+-- cut off the optimization because another case we must rewrite the subquery
+-- as a lateral TargetEntry and arrange these links.
+-- But now, machinery of convert_ANY_sublink_to_join() isn't prepared for such
+-- complex work and it would induce additional complex code.
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2
+    FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (b.f2) HAVING b.f2 > a.f3
+  )
+;
+                                       QUERY PLAN                              
          
+-----------------------------------------------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Group
+           Group Key: b.f2
+           ->  Seq Scan on subselect_tbl b
+                 Filter: ((f2 < 12) AND (f2 = a.f2) AND ((f2)::double 
precision > a.f3))
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2 FROM (
+      SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,a.f1) x
+    ) b WHERE b.f2 = a.f2 AND b.f1 < 12
+  )
+; -- Don't allow links to upper query in FROM section of subquery
+                    QUERY PLAN                     
+---------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Function Scan on generate_series x
+           Filter: ((x < 12) AND ((x + 1) = a.f2))
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT a.f1 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (a.f1)
+  )
+; -- GROUP BY contains link to upper relation
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Group
+           Group Key: a.f1
+           ->  Seq Scan on subselect_tbl b
+                 Filter: ((f2 < 12) AND (f2 = a.f2))
+(7 rows)
+
+-- Flatten subquery with not-correlated clauses. The same result set returned
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <> 
42);
+                                             QUERY PLAN                        
                     
+----------------------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((upper.f1 = subselect_tbl.f2) AND ((upper.f2)::double precision 
= subselect_tbl.f3))
+   ->  Seq Scan on subselect_tbl upper
+   ->  Hash
+         ->  HashAggregate
+               Group Key: subselect_tbl.f2, subselect_tbl.f3
+               ->  Seq Scan on subselect_tbl
+                     Filter: (f2 <> 42)
+(8 rows)
+
+SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <> 
42);
+ Correlated Field | Second Field 
+------------------+--------------
+                2 |            4
+                3 |            5
+                1 |            1
+                2 |            2
+                3 |            3
+(5 rows)
+
 SELECT f1 AS "Correlated Field", f3 AS "Second Field"
   FROM SUBSELECT_TBL upper
   WHERE f3 IN (SELECT upper.f1 + f2 FROM SUBSELECT_TBL
diff --git a/src/test/regress/sql/prepare.sql b/src/test/regress/sql/prepare.sql
index c6098dc95c..8a8164ee99 100644
--- a/src/test/regress/sql/prepare.sql
+++ b/src/test/regress/sql/prepare.sql
@@ -78,6 +78,13 @@ PREPARE q8 AS
 SELECT name, statement, parameter_types, result_types FROM 
pg_prepared_statements
     ORDER BY name;
 
+-- The optimization on unnesting of correlated subqueries should work
+PREPARE q9(name,int) AS
+    SELECT * FROM tenk1 upper WHERE unique1 IN (
+        SELECT sub.unique2 FROM tenk1 sub
+        WHERE sub.stringu1 = $1 AND sub.unique1 = upper.unique2 + $2);
+EXPLAIN (COSTS OFF) EXECUTE q9('abc',2);
+
 -- test DEALLOCATE ALL;
 DEALLOCATE ALL;
 SELECT name, statement, parameter_types FROM pg_prepared_statements
diff --git a/src/test/regress/sql/subselect.sql 
b/src/test/regress/sql/subselect.sql
index 40276708c9..40ed61d508 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -67,11 +67,173 @@ SELECT f1 AS "Correlated Field", f2 AS "Second Field"
   FROM SUBSELECT_TBL upper
   WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1);
 
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
+-- Still doesn't work for NOT IN
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 NOT IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
+
 SELECT f1 AS "Correlated Field", f3 AS "Second Field"
   FROM SUBSELECT_TBL upper
   WHERE f1 IN
     (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
 
+-- Constraints, imposed by LATERAL references, prohibit flattening of 
underlying
+-- Sublink.
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a
+  WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+      SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+SELECT count(*) FROM SUBSELECT_TBL a
+  WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+      SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a
+  WHERE NOT EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+      SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+
+-- Prohibit to unnest subquery - quals contain lateral references to rels
+-- outside a higher outer join.
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN (
+  SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2);
+SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN (
+  SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2);
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN
+    (SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f1)
+; -- Optimizer removes excess clause
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2*b.f1/b.f3+2 FROM subselect_tbl b WHERE b.f3 = a.f2)
+; -- a bit more complex targetlist expression shouldn't cut off the 
optimization
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1 FROM subselect_tbl b WHERE b.f3 = a.f2)
+; -- Two variables in a target list
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1*2 FROM subselect_tbl b WHERE b.f3 = 
a.f2)
+; -- Expression as an element of composite type shouldn't cut off the 
optimization
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f1 = 
b.f3 AND b.f3 <> 12)
+; -- Two expressions with correlated variables
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f2 = 
b.f3 AND b.f1 < 12)
+; -- Two expressions with correlated variables relates on one upper variable.
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (b.f2)
+  )
+; -- Pull clauses without unnesting the query. XXX: It reduces performance in 
most use cases, doesn't it?
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2 FROM (
+      SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,10) x
+    ) b WHERE b.f2 = a.f2 AND b.f1 BETWEEN 12 AND 14
+      UNION ALL
+    SELECT c.f1 FROM subselect_tbl c
+    WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14
+  )
+; -- Disallow flattening of union all
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f1 FROM subselect_tbl b JOIN subselect_tbl c ON (b.f1 = c.f2)
+    WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14
+  )
+; -- XXX: Could we flatten such subquery?
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f1 FROM subselect_tbl b, subselect_tbl c
+    WHERE b.f1 = c.f2 AND c.f2 = a.f2 AND c.f1 IS NOT NULL
+  )
+; -- TODO: Could we flatten such subquery?
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE (a.f1,f2) IN (
+    SELECT b.f2, avg(f3) FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (b.f2)
+  )
+; -- Doesn't support unnesting with aggregate functions
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    WITH cte AS (
+      SELECT * FROM subselect_tbl c WHERE f1 < 42 AND f2 = a.f1
+    )
+    SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12
+  )
+; -- Give up optimization if CTE in subquery contains links to upper relation.
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    WITH cte AS (
+      SELECT * FROM subselect_tbl c WHERE f1 < 42
+    )
+    SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12
+  )
+; -- Correlated subquery with trivial CTE can be pulled up
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE (a.f1,a.f3) IN (
+    SELECT b.f2, avg(b.f3) OVER (PARTITION BY b.f2)
+    FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+  )
+; -- Doesn't support unnesting with window functions in target list
+
+-- A having qual, group clause and so on, with links to upper relation variable
+-- cut off the optimization because another case we must rewrite the subquery
+-- as a lateral TargetEntry and arrange these links.
+-- But now, machinery of convert_ANY_sublink_to_join() isn't prepared for such
+-- complex work and it would induce additional complex code.
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2
+    FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (b.f2) HAVING b.f2 > a.f3
+  )
+;
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2 FROM (
+      SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,a.f1) x
+    ) b WHERE b.f2 = a.f2 AND b.f1 < 12
+  )
+; -- Don't allow links to upper query in FROM section of subquery
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT a.f1 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (a.f1)
+  )
+; -- GROUP BY contains link to upper relation
+
+-- Flatten subquery with not-correlated clauses. The same result set returned
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <> 
42);
+SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <> 
42);
+
 SELECT f1 AS "Correlated Field", f3 AS "Second Field"
   FROM SUBSELECT_TBL upper
   WHERE f3 IN (SELECT upper.f1 + f2 FROM SUBSELECT_TBL
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 97c9bc1861..4803e7a978 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -470,6 +470,7 @@ CopySource
 CopyStmt
 CopyToState
 CopyToStateData
+correlated_t
 Cost
 CostSelector
 Counters
-- 
2.37.3

Reply via email to