On 10/31/2013 09:37 PM, Antonin Houska wrote:
> On 10/31/2013 03:46 PM, Antonin Houska wrote:
> I'm not sure if it's legal for the WHERE clause to reference LHS of the
> original outer join (a.j). Some more restriction may be needed. I need
> to think about it a bit more.

For a subquery or sublink expression referencing the outer table of an
OJ (see tab1)

SELECT *
FROM    tab1 a
        LEFT JOIN
        tab2 b
        ON a.i = ANY (
                SELECT  k
                FROM    tab3 c
                WHERE   k = a.i);

I started my considerations by inserting the SEMI JOIN in a form of
subquery, instead of a join node - see SJ_subquery here:

SELECT  *
FROM    tab1 a
        LEFT JOIN
        (
           SELECT *
           tab2 b
           SEMI JOIN
           (  SELECT  k
              FROM    tab3 c
              WHERE   k = a.i
           ) AS ANY_subquery
           ON a.i = ANY_subquery.k
        ) AS SJ_subquery
        ON true;

(To allow a.i in the sublink expression, we'd only need to pass both
tab1 and tab2 to pull_up_sublinks_qual_recurse() in available_rels1.)

However it seem to be these lateral references (from the subquery and/or
the sublink expression) to tab1 that make it impossible for
SJ_subquery to be pulled up into the parent query's join tree - see
jointree_contains_lateral_outer_refs(). I'm not sure if it makes much
sense to pull up the sublink in such a case, does it?

I ended up with this logic: if the join is INNER, both the subquery and
sublink expression can reference either side. If the join is OUTER, only
the inner side can be referenced. Otherwise no attempt to introduce the
SEMI JOIN.

Can this be considered a patch, or is it wrong/incomplete?

// Antonin Houska (Tony)



diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index d8cabbd..6095008 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1167,6 +1167,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	JoinExpr   *result;
 	Query	   *parse = root->parse;
 	Query	   *subselect = (Query *) sublink->subselect;
+	bool	subselectLateral;
 	Relids		upper_varnos;
 	int			rtindex;
 	RangeTblEntry *rte;
@@ -1177,11 +1178,10 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	Assert(sublink->subLinkType == ANY_SUBLINK);
 
 	/*
-	 * The sub-select must not refer to any Vars of the parent query. (Vars of
-	 * higher levels should be okay, though.)
+	 * If the subselect refers to any Vars of the parent query, it must be
+	 * marked LATERAL.
 	 */
-	if (contain_vars_of_level((Node *) subselect, 1))
-		return NULL;
+	subselectLateral = contain_vars_of_level((Node *) subselect, 1);
 
 	/*
 	 * The test expression must contain some Vars of the parent query, else
@@ -1199,6 +1199,20 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 		return NULL;
 
 	/*
+	 * Neither the subselect can refer to outside available_rels.
+	 * (Such lateral references would prevent the resulting subquery from
+	 * being reached by pull_up_simple_subquery.)
+	 */
+	if (subselectLateral)
+	{
+		Relids sub_varnos = pull_varnos_of_level((Node *) subselect, 1);
+
+		if (!bms_is_subset(sub_varnos, available_rels))
+			return NULL;
+	}
+
+
+	/*
 	 * The combining operators and left-hand expressions mustn't be volatile.
 	 */
 	if (contain_volatile_functions(sublink->testexpr))
@@ -1215,7 +1229,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	rte = addRangeTableEntryForSubquery(NULL,
 										subselect,
 										makeAlias("ANY_subquery", NIL),
-										false,
+										subselectLateral,
 										false);
 	parse->rtable = lappend(parse->rtable, rte);
 	rtindex = list_length(parse->rtable);
CREATE TABLE tab1(i int);
CREATE TABLE tab2(j int);
CREATE TABLE tab3(k int);
EXPLAIN
SELECT *
FROM	tab1 a
	JOIN
	tab2 b
	ON b.j = ANY (
		SELECT  k
		FROM	tab3
		WHERE k = b.j);

EXPLAIN
SELECT *
FROM	tab1 a
	JOIN
	tab2 b
	-- Available_rels1 contains both 'a' and 'b'.
	-- Both sublink expression and the subselect can refer to either side
	-- of the JOIN.
	ON a.i = ANY (
		SELECT  k
		FROM	tab3
		WHERE k = a.i);

EXPLAIN
SELECT *
FROM	tab1 a
	LEFT JOIN
	tab2 b
	ON b.j = ANY (
		SELECT  k
		FROM	tab3
		WHERE k = b.j);

EXPLAIN
SELECT *
FROM	tab1 a
	LEFT JOIN
	tab2 b
	ON b.j = ANY (
		SELECT  k
		FROM	tab3
		-- Lateral reference to 'a', no sublink pull-up
	 	WHERE k = a.i); 

EXPLAIN
SELECT *
FROM	tab1 a
	LEFT JOIN
	tab2 b
	-- Lateral reference to 'a', no sublink pull-up
	ON a.i = ANY (
		SELECT  k
		FROM	tab3
	 	WHERE k = b.j); 
                                QUERY PLAN                                
--------------------------------------------------------------------------
 Nested Loop  (cost=0.00..84113.00 rows=2880000 width=8)
   ->  Seq Scan on tab1 a  (cost=0.00..34.00 rows=2400 width=4)
   ->  Materialize  (cost=0.00..48082.00 rows=1200 width=4)
         ->  Seq Scan on tab2 b  (cost=0.00..48076.00 rows=1200 width=4)
               Filter: (SubPlan 1)
               SubPlan 1
                 ->  Seq Scan on tab3  (cost=0.00..40.00 rows=12 width=4)
                       Filter: (k = b.j)
(8 rows)

                                QUERY PLAN                                
--------------------------------------------------------------------------
 Nested Loop  (cost=0.00..84113.00 rows=2880000 width=8)
   ->  Seq Scan on tab2 b  (cost=0.00..34.00 rows=2400 width=4)
   ->  Materialize  (cost=0.00..48082.00 rows=1200 width=4)
         ->  Seq Scan on tab1 a  (cost=0.00..48076.00 rows=1200 width=4)
               Filter: (SubPlan 1)
               SubPlan 1
                 ->  Seq Scan on tab3  (cost=0.00..40.00 rows=12 width=4)
                       Filter: (k = a.i)
(8 rows)

                                QUERY PLAN                                
--------------------------------------------------------------------------
 Nested Loop Left Join  (cost=0.00..84113.00 rows=2880000 width=8)
   ->  Seq Scan on tab1 a  (cost=0.00..34.00 rows=2400 width=4)
   ->  Materialize  (cost=0.00..48082.00 rows=1200 width=4)
         ->  Seq Scan on tab2 b  (cost=0.00..48076.00 rows=1200 width=4)
               Filter: (SubPlan 1)
               SubPlan 1
                 ->  Seq Scan on tab3  (cost=0.00..40.00 rows=12 width=4)
                       Filter: (k = b.j)
(8 rows)

                              QUERY PLAN                               
-----------------------------------------------------------------------
 Nested Loop Left Join  (cost=0.00..115372874.00 rows=2880000 width=8)
   Join Filter: (SubPlan 1)
   ->  Seq Scan on tab1 a  (cost=0.00..34.00 rows=2400 width=4)
   ->  Materialize  (cost=0.00..46.00 rows=2400 width=4)
         ->  Seq Scan on tab2 b  (cost=0.00..34.00 rows=2400 width=4)
   SubPlan 1
     ->  Seq Scan on tab3  (cost=0.00..40.00 rows=12 width=4)
           Filter: (k = a.i)
(8 rows)

                              QUERY PLAN                               
-----------------------------------------------------------------------
 Nested Loop Left Join  (cost=0.00..115372874.00 rows=2880000 width=8)
   Join Filter: (SubPlan 1)
   ->  Seq Scan on tab1 a  (cost=0.00..34.00 rows=2400 width=4)
   ->  Materialize  (cost=0.00..46.00 rows=2400 width=4)
         ->  Seq Scan on tab2 b  (cost=0.00..34.00 rows=2400 width=4)
   SubPlan 1
     ->  Seq Scan on tab3  (cost=0.00..40.00 rows=12 width=4)
           Filter: (k = b.j)
(8 rows)

                                    QUERY PLAN                                  
  
----------------------------------------------------------------------------------
 Merge Join  (cost=6385.49..6829.49 rows=2880000 width=8)
   Merge Cond: (tab3.k = b.j)
   ->  Sort  (cost=6216.75..6222.75 rows=2400 width=8)
         Sort Key: tab3.k
         ->  Nested Loop  (cost=40.00..6082.00 rows=2400 width=8)
               ->  HashAggregate  (cost=40.00..42.00 rows=200 width=4)
                     ->  Seq Scan on tab3  (cost=0.00..34.00 rows=2400 width=4)
               ->  Materialize  (cost=0.00..46.00 rows=2400 width=4)
                     ->  Seq Scan on tab1 a  (cost=0.00..34.00 rows=2400 
width=4)
   ->  Sort  (cost=168.75..174.75 rows=2400 width=4)
         Sort Key: b.j
         ->  Seq Scan on tab2 b  (cost=0.00..34.00 rows=2400 width=4)
(12 rows)

                                    QUERY PLAN                                  
  
----------------------------------------------------------------------------------
 Merge Join  (cost=6385.49..6829.49 rows=2880000 width=8)
   Merge Cond: (tab3.k = a.i)
   ->  Sort  (cost=6216.75..6222.75 rows=2400 width=8)
         Sort Key: tab3.k
         ->  Nested Loop  (cost=40.00..6082.00 rows=2400 width=8)
               ->  HashAggregate  (cost=40.00..42.00 rows=200 width=4)
                     ->  Seq Scan on tab3  (cost=0.00..34.00 rows=2400 width=4)
               ->  Materialize  (cost=0.00..46.00 rows=2400 width=4)
                     ->  Seq Scan on tab2 b  (cost=0.00..34.00 rows=2400 
width=4)
   ->  Sort  (cost=168.75..174.75 rows=2400 width=4)
         Sort Key: a.i
         ->  Seq Scan on tab1 a  (cost=0.00..34.00 rows=2400 width=4)
(12 rows)

                                      QUERY PLAN                                
      
--------------------------------------------------------------------------------------
 Nested Loop Left Join  (cost=44.50..36148.50 rows=2880000 width=8)
   ->  Seq Scan on tab1 a  (cost=0.00..34.00 rows=2400 width=4)
   ->  Materialize  (cost=44.50..117.50 rows=1200 width=4)
         ->  Hash Join  (cost=44.50..111.50 rows=1200 width=4)
               Hash Cond: (b.j = tab3.k)
               ->  Seq Scan on tab2 b  (cost=0.00..34.00 rows=2400 width=4)
               ->  Hash  (cost=42.00..42.00 rows=200 width=4)
                     ->  HashAggregate  (cost=40.00..42.00 rows=200 width=4)
                           ->  Seq Scan on tab3  (cost=0.00..34.00 rows=2400 
width=4)
(9 rows)

                              QUERY PLAN                               
-----------------------------------------------------------------------
 Nested Loop Left Join  (cost=0.00..115372874.00 rows=2880000 width=8)
   Join Filter: (SubPlan 1)
   ->  Seq Scan on tab1 a  (cost=0.00..34.00 rows=2400 width=4)
   ->  Materialize  (cost=0.00..46.00 rows=2400 width=4)
         ->  Seq Scan on tab2 b  (cost=0.00..34.00 rows=2400 width=4)
   SubPlan 1
     ->  Seq Scan on tab3  (cost=0.00..40.00 rows=12 width=4)
           Filter: (k = a.i)
(8 rows)

                              QUERY PLAN                               
-----------------------------------------------------------------------
 Nested Loop Left Join  (cost=0.00..115372874.00 rows=2880000 width=8)
   Join Filter: (SubPlan 1)
   ->  Seq Scan on tab1 a  (cost=0.00..34.00 rows=2400 width=4)
   ->  Materialize  (cost=0.00..46.00 rows=2400 width=4)
         ->  Seq Scan on tab2 b  (cost=0.00..34.00 rows=2400 width=4)
   SubPlan 1
     ->  Seq Scan on tab3  (cost=0.00..40.00 rows=12 width=4)
           Filter: (k = b.j)
(8 rows)

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to