From c05e93afb56befb567d8a95b090db453264d3b9f Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Thu, 18 May 2017 20:10:05 +1200
Subject: [PATCH 1/2] Minimal fix for foreign key join estimations

---
 src/backend/optimizer/path/costsize.c | 95 +++++++++--------------------------
 src/test/regress/expected/join.out    | 32 ++++++++++++
 src/test/regress/sql/join.sql         | 25 +++++++++
 3 files changed, 82 insertions(+), 70 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index cdb18d9..d7bdd41 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -4303,12 +4303,21 @@ get_foreign_key_join_selectivity(PlannerInfo *root,
 	List	   *worklist = *restrictlist;
 	ListCell   *lc;
 
+	/*
+	 * When the inner side of the semi/anti join is itself a join, it's hard
+	 * to guess what fraction of the referenced table will get through the
+	 * join.  Let's just reject this case outright for now.
+	 */
+	if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) &&
+		bms_membership(inner_relids) != BMS_SINGLETON)
+		return 1.0;
+
 	/* Consider each FK constraint that is known to match the query */
 	foreach(lc, root->fkey_list)
 	{
 		ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc);
+		RelOptInfo *ref_rel;
 		bool		ref_is_outer;
-		bool		use_smallest_selectivity = false;
 		List	   *removedlist;
 		ListCell   *cell;
 		ListCell   *prev;
@@ -4429,10 +4438,6 @@ get_foreign_key_join_selectivity(PlannerInfo *root,
 		 * be double-counting the null fraction, and (2) it's not very clear
 		 * how to combine null fractions for multiple referencing columns.
 		 *
-		 * In the use_smallest_selectivity code below, null derating is done
-		 * implicitly by relying on clause_selectivity(); in the other cases,
-		 * we do nothing for now about correcting for nulls.
-		 *
 		 * XXX another point here is that if either side of an FK constraint
 		 * is an inheritance parent, we estimate as though the constraint
 		 * covers all its children as well.  This is not an unreasonable
@@ -4443,52 +4448,25 @@ get_foreign_key_join_selectivity(PlannerInfo *root,
 		 * work, it is uncommon in practice to have an FK referencing a parent
 		 * table.  So, at least for now, disregard inheritance here.
 		 */
-		if (ref_is_outer && jointype != JOIN_INNER)
-		{
-			/*
-			 * When the referenced table is on the outer side of a non-inner
-			 * join, knowing that each inner row has exactly one match is not
-			 * as useful as one could wish, since we really need to know the
-			 * fraction of outer rows with a match.  Still, we can avoid the
-			 * folly of multiplying the per-column estimates together.  Take
-			 * the smallest per-column selectivity, instead.  (This should
-			 * correspond to the FK column with the most nulls.)
-			 */
-			use_smallest_selectivity = true;
-		}
-		else if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
+
+		ref_rel = find_base_rel(root, fkinfo->ref_relid);
+
+		if (!ref_is_outer && (jointype == JOIN_SEMI || jointype == JOIN_ANTI))
 		{
 			/*
 			 * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the
-			 * fraction of LHS rows that have matches.  The referenced table
-			 * is on the inner side (we already handled the other case above),
-			 * so the FK implies that every LHS row has a match *in the
+			 * fraction of LHS rows that have matches.  Here we handle the
+			 * case where the referenced table is on the inner side,* so the
+			 * FK implies that every LHS row has a match *in the
 			 * referenced table*.  But any restriction or join clauses below
-			 * here will reduce the number of matches.
+			 * here will reduce the number of matches.  It's possible that
+			 * we'll never hit this case for a SEMI join, as surely unique
+			 * proofs were found on the inner side of the SEMI join which
+			 * allowed it to be converted into an INNER join.  Nevertheless,
+			 * we'd better just handle this case properly in case it does
+			 * arise.
 			 */
-			if (bms_membership(inner_relids) == BMS_SINGLETON)
-			{
-				/*
-				 * When the inner side of the semi/anti join is just the
-				 * referenced table, we may take the FK selectivity as equal
-				 * to the selectivity of the table's restriction clauses.
-				 */
-				RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid);
-				double		ref_tuples = Max(ref_rel->tuples, 1.0);
-
-				fkselec *= ref_rel->rows / ref_tuples;
-			}
-			else
-			{
-				/*
-				 * When the inner side of the semi/anti join is itself a join,
-				 * it's hard to guess what fraction of the referenced table
-				 * will get through the join.  But we still don't want to
-				 * multiply per-column estimates together.  Take the smallest
-				 * per-column selectivity, instead.
-				 */
-				use_smallest_selectivity = true;
-			}
+			fkselec *= ref_rel->rows / Max(ref_rel->tuples, 1.0);
 		}
 		else
 		{
@@ -4497,30 +4475,7 @@ get_foreign_key_join_selectivity(PlannerInfo *root,
 			 * guard against tuples == 0.  Note we should use the raw table
 			 * tuple count, not any estimate of its filtered or joined size.
 			 */
-			RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid);
-			double		ref_tuples = Max(ref_rel->tuples, 1.0);
-
-			fkselec *= 1.0 / ref_tuples;
-		}
-
-		/*
-		 * Common code for cases where we should use the smallest selectivity
-		 * that would be computed for any one of the FK's clauses.
-		 */
-		if (use_smallest_selectivity)
-		{
-			Selectivity thisfksel = 1.0;
-
-			foreach(cell, removedlist)
-			{
-				RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
-				Selectivity csel;
-
-				csel = clause_selectivity(root, (Node *) rinfo,
-										  0, jointype, sjinfo);
-				thisfksel = Min(thisfksel, csel);
-			}
-			fkselec *= thisfksel;
+			fkselec *= 1.0 / Max(ref_rel->tuples, 1.0);
 		}
 	}
 
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index d08b1e1..9e6e23d 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5691,3 +5691,35 @@ where exists (select 1 from j3
 (13 rows)
 
 drop table j3;
+--
+-- Test Foreign key join estimation performs sanely for outer joins
+--
+begin work;
+create table fkest (a int, b int, c int unique, primary key(a,b));
+create table fkest1 (a int, b int, primary key(a,b));
+insert into fkest select x/10,x%10, x from generate_Series(1,400) x;
+insert into fkest1 select x/10,x%10 from generate_Series(1,400) x;
+alter table fkest1 add constraint fkest1_a_b_fkey foreign key (a,b) references fkest;
+analyze fkest;
+analyze fkest1;
+explain (costs off) select * from fkest f
+left join fkest1 f1 on f.a = f1.a and f.b = f1.b
+left join fkest1 f2 on f.a = f2.a and f.b = f2.b
+left join fkest1 f3 on f.a = f3.a and f.b = f3.b
+where f.c = 1;
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Nested Loop Left Join
+   ->  Nested Loop Left Join
+         ->  Nested Loop Left Join
+               ->  Seq Scan on fkest f
+                     Filter: (c = 1)
+               ->  Index Only Scan using fkest1_pkey on fkest1 f1
+                     Index Cond: ((a = f.a) AND (b = f.b))
+         ->  Index Only Scan using fkest1_pkey on fkest1 f2
+               Index Cond: ((a = f.a) AND (b = f.b))
+   ->  Index Only Scan using fkest1_pkey on fkest1 f3
+         Index Cond: ((a = f.a) AND (b = f.b))
+(11 rows)
+
+rollback;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index c3994ea..5c79d74 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1878,3 +1878,28 @@ where exists (select 1 from j3
       and t1.unique1 < 1;
 
 drop table j3;
+
+--
+-- Test Foreign key join estimation performs sanely for outer joins
+--
+
+begin work;
+
+create table fkest (a int, b int, c int unique, primary key(a,b));
+create table fkest1 (a int, b int, primary key(a,b));
+
+insert into fkest select x/10,x%10, x from generate_Series(1,400) x;
+insert into fkest1 select x/10,x%10 from generate_Series(1,400) x;
+
+alter table fkest1 add constraint fkest1_a_b_fkey foreign key (a,b) references fkest;
+
+analyze fkest;
+analyze fkest1;
+
+explain (costs off) select * from fkest f
+left join fkest1 f1 on f.a = f1.a and f.b = f1.b
+left join fkest1 f2 on f.a = f2.a and f.b = f2.b
+left join fkest1 f3 on f.a = f3.a and f.b = f3.b
+where f.c = 1;
+
+rollback;
-- 
1.9.5.msysgit.1

