diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 129fc3d..bc1929c 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -22,6 +22,8 @@
  */
 #include "postgres.h"
 
+#include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
 #include "optimizer/joininfo.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
@@ -153,13 +155,26 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	int			attroff;
 
 	/*
-	 * Currently, we only know how to remove left joins to a baserel with
-	 * unique indexes.  We can check most of these criteria pretty trivially
-	 * to avoid doing useless extra work.  But checking whether any of the
-	 * indexes are unique would require iterating over the indexlist, so for
-	 * now we just make sure there are indexes of some sort or other.  If none
-	 * of them are unique, join removal will still fail, just slightly later.
+	 * Assuming none of the variables from the join are needed by the query,
+	 * it is possible here to remove a left join providing we can determine
+	 * that the join will never produce more than 1 row that matches the join
+	 * condition.
+	 *
+	 * There are a few ways that we can do this:
+	 *
+	 * 1. When joining to a baserel we can check if a unique index exists
+	 *    where all of the columns of the index are seen in the join condition
+	 *    with equality operators.
+	 *
+	 * 2. When joining to a subquery we can check if the subquery contains a
+	 *    GROUP BY or DISTINCT clause where all of the columns of the clause
+	 *    appear in the join condition with equality operators.
+	 *
+	 * The code below is written with the assumption that join removal is more
+	 * likely not to happen, for this reason we try to fast path out of this
+	 * function early when possible.
 	 */
+
 	if (sjinfo->jointype != JOIN_LEFT ||
 		sjinfo->delay_upper_joins ||
 		bms_membership(sjinfo->min_righthand) != BMS_SINGLETON)
@@ -168,11 +183,21 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	innerrelid = bms_singleton_member(sjinfo->min_righthand);
 	innerrel = find_base_rel(root, innerrelid);
 
-	if (innerrel->reloptkind != RELOPT_BASEREL ||
-		innerrel->rtekind != RTE_RELATION ||
-		innerrel->indexlist == NIL)
+	if (innerrel->reloptkind != RELOPT_BASEREL)
 		return false;
 
+	if (innerrel->rtekind == RTE_RELATION)
+	{
+		/*
+		 * If there are no indexes then there's certainly no unique indexes
+		 * so there's no need to go any further.
+		 */
+		if (innerrel->indexlist == NIL)
+			return false;
+	}
+	else if (innerrel->rtekind != RTE_SUBQUERY)
+		return false; /* unsupported rtekind */
+
 	/* Compute the relid set for the join we are considering */
 	joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
 
@@ -276,17 +301,83 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	 */
 
 	/* Now examine the indexes to see if we have a matching unique index */
-	if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL))
+	if (innerrel->rtekind == RTE_RELATION &&
+		relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL))
 		return true;
 
 	/*
+	 * For subqueries we should be able to remove the join if the subquery
+	 * can't produce more than 1 record which matches the outer query on the
+	 * join condition. However, there's a few pre-conditions that the subquery
+	 * must meet for it to be safe to remove:
+	 *
+	 * 1. The subquery mustn't contain a FOR UPDATE clause. Removing such a
+	 *    join would have the undesired side affect of not locking the rows.
+	 *
+	 * 2. The subquery mustn't contain any volatile functions. Removing such
+	 *    a join would cause side affects that the volatile functions may have,
+	 *    not to occur.
+	 *
+	 * 3. The subquery mustn't contain any set returning functions. These can
+	 *    cause duplicate records despite the existence of a DISTINCT or
+	 *    GROUP BY clause which could otherwise make the subquery unique.
+	 */
+	if (innerrel->rtekind == RTE_SUBQUERY)
+	{
+		List	*colnos;
+		List	*opids;
+		Query	*subquery = root->simple_rte_array[innerrelid]->subquery;
+
+		/* check point 1 */
+		if (subquery->hasForUpdate)
+			return false;
+
+		/* check point 2 */
+		if (contain_volatile_functions((Node *) subquery->targetList))
+			return false;
+
+		/* check point 3 */
+		if (expression_returns_set((Node *) subquery->targetList))
+			return false;
+
+		colnos = NULL;
+		opids = NULL;
+
+		/*
+		 * Build a list of varattnos that we require the subquery to be unique over.
+		 * We also build a list of the operators that are used with these vars in the
+		 * join condition so that query_is_distinct_for can check that these
+		 * operators are compatible with the GROUP BY or DISTINCT clause in the
+		 * subquery.
+		 */
+		foreach(l, clause_list)
+		{
+			RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+			Var			 *var;
+
+			if (rinfo->outer_is_left)
+				var = (Var *) get_rightop(rinfo->clause);
+			else
+				var = (Var *) get_leftop(rinfo->clause);
+
+			if (!var || !IsA(var, Var) ||
+				var->varno != innerrelid)
+				continue;
+
+			colnos = lappend_int(colnos, var->varattno);
+			opids = lappend_oid(opids, rinfo->hashjoinoperator);
+		}
+
+		return query_is_distinct_for(subquery, colnos, opids);
+	}
+
+	/*
 	 * Some day it would be nice to check for other methods of establishing
 	 * distinctness.
 	 */
 	return false;
 }
 
-
 /*
  * Remove the target relid from the planner's data structures, having
  * determined that there is no need to include it in the query.
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 4e05dcd..f701954 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -38,7 +38,6 @@ typedef enum
 } PathCostComparison;
 
 static List *translate_sub_tlist(List *tlist, int relid);
-static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
 static Oid	distinct_col_search(int colno, List *colnos, List *opids);
 
 
@@ -1465,7 +1464,7 @@ translate_sub_tlist(List *tlist, int relid)
  * should give trustworthy answers for all operators that we might need
  * to deal with here.)
  */
-static bool
+bool
 query_is_distinct_for(Query *query, List *colnos, List *opids)
 {
 	ListCell   *l;
@@ -1486,6 +1485,13 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 			TargetEntry *tle = get_sortgroupclause_tle(sgc,
 													   query->targetList);
 
+			/*
+			 * We can ignore constants since they have only one value and don't
+			 * affect uniqueness of results.
+			 */
+			if (IsA(tle->expr, Const))
+				continue;
+
 			opid = distinct_col_search(tle->resno, colnos, opids);
 			if (!OidIsValid(opid) ||
 				!equality_ops_are_compatible(opid, sgc->eqop))
@@ -1507,6 +1513,13 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 			TargetEntry *tle = get_sortgroupclause_tle(sgc,
 													   query->targetList);
 
+			/*
+			 * We can ignore constants since they have only one value and don't
+			 * affect uniqueness of results.
+			 */
+			if (IsA(tle->expr, Const))
+				continue;
+
 			opid = distinct_col_search(tle->resno, colnos, opids);
 			if (!OidIsValid(opid) ||
 				!equality_ops_are_compatible(opid, sgc->eqop))
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index a0bcc82..2f571f5 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -67,6 +67,7 @@ extern ResultPath *create_result_path(List *quals);
 extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath);
 extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
 				   Path *subpath, SpecialJoinInfo *sjinfo);
+extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
 extern Path *create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel,
 						 List *pathkeys, Relids required_outer);
 extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index c62a63f..4959e5f 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3131,9 +3131,11 @@ begin;
 CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int);
 CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int);
 CREATE TEMP TABLE c (id int PRIMARY KEY);
+CREATE TEMP TABLE d (a INT, b INT);
 INSERT INTO a VALUES (0, 0), (1, NULL);
 INSERT INTO b VALUES (0, 0), (1, NULL);
 INSERT INTO c VALUES (0), (1);
+INSERT INTO d VALUES (1,3),(2,2),(3,1);
 -- all three cases should be optimizable into a simple seqscan
 explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
   QUERY PLAN   
@@ -3169,6 +3171,161 @@ select id from a where id in (
          ->  Seq Scan on b
 (5 rows)
 
+-- check that join removal works for a left join when joining a subquery
+-- that is guaranteed to be unique on the join condition by the GROUP BY clause
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id FROM b GROUP BY b.id) b ON a.b_id = b.id;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- check that join removal works for a left join when joining a subquery
+-- that is guaranteed to be unique on the join condition by the GROUP BY clause
+-- which contains more than 1 column.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id,b.c_id FROM b GROUP BY b.id,b.c_id) b ON a.b_id = b.id AND a.id = b.c_id;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- check that join removal works for a left join when joining a subquery
+-- where the join condition is a superset of the columns in the GROUP BY.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a
+LEFT JOIN (SELECT b.id,c_id FROM b GROUP BY b.id) b ON a.id = b.id AND b.c_id = 1;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- check that join removal works for a left join when joining a subquery
+-- that is guaranteed to be unique on the join condition by the DISTINCT clause
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT a+b AS ab FROM d) d ON a.id = d.ab;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- join removal is not possible when distinct contains a volatile function
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT a+b+random() AS abr FROM d) d ON a.id = d.abr;
+                                        QUERY PLAN                                        
+------------------------------------------------------------------------------------------
+ Hash Left Join
+   Hash Cond: ((a.id)::double precision = ((((d.a + d.b))::double precision + random())))
+   ->  Seq Scan on a
+   ->  Hash
+         ->  HashAggregate
+               Group Key: (((d.a + d.b))::double precision + random())
+               ->  Seq Scan on d
+(7 rows)
+
+-- check that join removal works for a left join when joining a subquery that
+-- is guaranteed to be unique on the join condition even if it contains a Const.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT b.c_id,1 AS dummy FROM b) b ON a.id = b.c_id;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- check join removal works when joining to a subquery that is guaranteed to be
+-- unique on the join condition even when the subquery itself involves a join. 
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id FROM b INNER JOIN c ON b.id = c.id GROUP BY b.id) b ON a.id + 10 = b.id;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- check join removal works with a left join when joining a unique subquery which
+-- contains 2 tables where the uniqueness enforced by the GROUP BY clause is a
+-- subset of the join condition.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a
+LEFT JOIN (SELECT b.id,1 as dummy FROM b INNER JOIN c ON b.id = c.id GROUP BY b.id) b ON a.id = b.id AND b.dummy = 1;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- join removal is not possible when the GROUP BY contains a column which is
+-- not in the join condition.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id FROM b GROUP BY b.id,b.c_id) b ON a.id = b.id;
+           QUERY PLAN            
+---------------------------------
+ Hash Right Join
+   Hash Cond: (b.id = a.id)
+   ->  HashAggregate
+         Group Key: b.id, b.c_id
+         ->  Seq Scan on b
+   ->  Hash
+         ->  Seq Scan on a
+(7 rows)
+
+-- join removal is not possible when DISTINCT clause contains a column which is
+-- not in the join condition.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT b.id,c_id FROM b) b ON a.id = b.id;
+           QUERY PLAN            
+---------------------------------
+ Hash Right Join
+   Hash Cond: (b.id = a.id)
+   ->  HashAggregate
+         Group Key: b.id, b.c_id
+         ->  Seq Scan on b
+   ->  Hash
+         ->  Seq Scan on a
+(7 rows)
+
+-- join removal is not possible when DISTINCT contains a volatile function
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT b.id,random() AS r FROM b) b ON a.id = b.id AND r = random();
+                   QUERY PLAN                    
+-------------------------------------------------
+ Hash Left Join
+   Hash Cond: (a.id = b.id)
+   ->  Seq Scan on a
+   ->  Hash
+         ->  Subquery Scan on b
+               Filter: (b.r = random())
+               ->  HashAggregate
+                     Group Key: b_1.id, random()
+                     ->  Seq Scan on b b_1
+(9 rows)
+
+-- join removal is not possible when there are any volatile functions in the target list.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT id,AVG(c_id),SUM(random()) FROM b GROUP BY id) b ON a.id = b.id;
+         QUERY PLAN         
+----------------------------
+ Hash Right Join
+   Hash Cond: (b.id = a.id)
+   ->  HashAggregate
+         Group Key: b.id
+         ->  Seq Scan on b
+   ->  Hash
+         ->  Seq Scan on a
+(7 rows)
+
+-- join removal is not possible when there are set returning functions in the target list.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT id,generate_series(1,2) FROM b GROUP BY id) b ON a.id = b.id;
+         QUERY PLAN         
+----------------------------
+ Hash Right Join
+   Hash Cond: (b.id = a.id)
+   ->  HashAggregate
+         Group Key: b.id
+         ->  Seq Scan on b
+   ->  Hash
+         ->  Seq Scan on a
+(7 rows)
+
 rollback;
 create temp table parent (k int primary key, pd int);
 create temp table child (k int unique, cd int);
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 1031f26..21e29d2 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -919,9 +919,11 @@ begin;
 CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int);
 CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int);
 CREATE TEMP TABLE c (id int PRIMARY KEY);
+CREATE TEMP TABLE d (a INT, b INT);
 INSERT INTO a VALUES (0, 0), (1, NULL);
 INSERT INTO b VALUES (0, 0), (1, NULL);
 INSERT INTO c VALUES (0), (1);
+INSERT INTO d VALUES (1,3),(2,2),(3,1);
 
 -- all three cases should be optimizable into a simple seqscan
 explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
@@ -936,6 +938,71 @@ select id from a where id in (
 	select b.id from b left join c on b.id = c.id
 );
 
+-- check that join removal works for a left join when joining a subquery
+-- that is guaranteed to be unique on the join condition by the GROUP BY clause
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id FROM b GROUP BY b.id) b ON a.b_id = b.id;
+
+-- check that join removal works for a left join when joining a subquery
+-- that is guaranteed to be unique on the join condition by the GROUP BY clause
+-- which contains more than 1 column.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id,b.c_id FROM b GROUP BY b.id,b.c_id) b ON a.b_id = b.id AND a.id = b.c_id;
+
+-- check that join removal works for a left join when joining a subquery
+-- where the join condition is a superset of the columns in the GROUP BY.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a
+LEFT JOIN (SELECT b.id,c_id FROM b GROUP BY b.id) b ON a.id = b.id AND b.c_id = 1;
+
+-- check that join removal works for a left join when joining a subquery
+-- that is guaranteed to be unique on the join condition by the DISTINCT clause
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT a+b AS ab FROM d) d ON a.id = d.ab;
+
+-- join removal is not possible when distinct contains a volatile function
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT a+b+random() AS abr FROM d) d ON a.id = d.abr;
+
+-- check that join removal works for a left join when joining a subquery that
+-- is guaranteed to be unique on the join condition even if it contains a Const.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT b.c_id,1 AS dummy FROM b) b ON a.id = b.c_id;
+
+-- check join removal works when joining to a subquery that is guaranteed to be
+-- unique on the join condition even when the subquery itself involves a join.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id FROM b INNER JOIN c ON b.id = c.id GROUP BY b.id) b ON a.id + 10 = b.id;
+
+-- check join removal works with a left join when joining a unique subquery which
+-- contains 2 tables where the uniqueness enforced by the GROUP BY clause is a
+-- subset of the join condition.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a
+LEFT JOIN (SELECT b.id,1 as dummy FROM b INNER JOIN c ON b.id = c.id GROUP BY b.id) b ON a.id = b.id AND b.dummy = 1;
+
+-- join removal is not possible when the GROUP BY contains a column which is
+-- not in the join condition.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id FROM b GROUP BY b.id,b.c_id) b ON a.id = b.id;
+
+-- join removal is not possible when DISTINCT clause contains a column which is
+-- not in the join condition.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT b.id,c_id FROM b) b ON a.id = b.id;
+
+-- join removal is not possible when DISTINCT contains a volatile function
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT b.id,random() AS r FROM b) b ON a.id = b.id AND r = random();
+
+-- join removal is not possible when there are any volatile functions in the target list.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT id,AVG(c_id),SUM(random()) FROM b GROUP BY id) b ON a.id = b.id;
+
+-- join removal is not possible when there are set returning functions in the target list.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT id,generate_series(1,2) FROM b GROUP BY id) b ON a.id = b.id;
+
 rollback;
 
 create temp table parent (k int primary key, pd int);