Merge clauses

Ilia Evdokimov Thu, 10 Jul 2025 03:10:10 -0700

Hi hackers,

The planner currently calls approx_tuple_count() to estimatehashjointuples and mergejointuples. That makes sense whenjoinrestrictinfo contains additional clauses beyond the hash/mergeequality list. But if all join restriction clauses are exactly thosehash/merge clauses, the estimate already computed inpath->jpath.path.rows is usually more accurate (and free).

This patch reuses path->jpath.path.rows in that case and skipsapprox_tuple_count().


Regression results
==================

                     | actual | approx | estimate
---------------------+--------+--------+-------
join.sql q1          |      5 |      1 |     5
join.sql q2          | 10 000 |      1 | 10 000
join.sql q3          |      5 |      5 |     5
join.sql q4          |      5 |      5 |     5
join.sql q5          |      5 |      1 |     5
partition_join q1    |    200 |      1 |   200
partition_join q2    |     42 |     84 |    84
partition_join q3    |      8 |      1 |     8
postgres_fdw         |   2001 |   1001 |  2001
select_parallel q1   |      0 |  5 000 | 5 000
updatable_views q1   |      2 |      2 |   423

Two cases get worse: select_parallel.sql and updatable_views.sql.

Looking forward to your feedback!

--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC.

From 79392e58873f60fc9b62fc7d8816ad973e775026 Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdoki...@tantorlabs.ru>
Date: Thu, 10 Jul 2025 13:04:04 +0300
Subject: [PATCH v1] Improve row count estimates for hash/merge joins that have
 no extra filters

If joinrestrictinfo holds nothing but the hash/merge equality keys,
path->jpath.path.rows already gives the right inner-join row count.
Skip the extra call to approx_tuple_count() and use that value instead.
---
 .../postgres_fdw/expected/postgres_fdw.out    | 26 ++++++-----
 src/backend/optimizer/path/costsize.c         | 17 +++++--
 src/test/isolation/expected/merge-update.out  |  4 +-
 src/test/regress/expected/join.out            | 46 +++++++++----------
 src/test/regress/expected/partition_join.out  | 37 +++++++--------
 src/test/regress/expected/select_parallel.out | 34 +++++++-------
 src/test/regress/expected/updatable_views.out | 16 +++----
 7 files changed, 94 insertions(+), 86 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 2185b42bb4f..3f06fde5547 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8698,26 +8698,28 @@ select foo.f1, loct1.f1 from foo join loct1 on (foo.f1 = loct1.f1) order by foo.
 -- list but no output change as compared to the previous query
 explain (verbose, costs off)
 	select foo.f1, loct1.f1 from foo left join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10;
-                                            QUERY PLAN                                            
---------------------------------------------------------------------------------------------------
+                                               QUERY PLAN                                               
+--------------------------------------------------------------------------------------------------------
  Limit
    Output: foo.f1, loct1.f1, foo.f2
    ->  Sort
          Output: foo.f1, loct1.f1, foo.f2
          Sort Key: foo.f2
-         ->  Merge Left Join
+         ->  Merge Right Join
                Output: foo.f1, loct1.f1, foo.f2
-               Merge Cond: (foo.f1 = loct1.f1)
-               ->  Merge Append
-                     Sort Key: foo.f1
-                     ->  Index Scan using i_foo_f1 on public.foo foo_1
-                           Output: foo_1.f1, foo_1.f2
-                     ->  Foreign Scan on public.foo2 foo_2
-                           Output: foo_2.f1, foo_2.f2
-                           Remote SQL: SELECT f1, f2 FROM public.loct1 ORDER BY f1 ASC NULLS LAST
+               Merge Cond: (loct1.f1 = foo.f1)
                ->  Index Only Scan using i_loct1_f1 on public.loct1
                      Output: loct1.f1
-(17 rows)
+               ->  Materialize
+                     Output: foo.f1, foo.f2
+                     ->  Merge Append
+                           Sort Key: foo.f1
+                           ->  Index Scan using i_foo_f1 on public.foo foo_1
+                                 Output: foo_1.f1, foo_1.f2
+                           ->  Foreign Scan on public.foo2 foo_2
+                                 Output: foo_2.f1, foo_2.f2
+                                 Remote SQL: SELECT f1, f2 FROM public.loct1 ORDER BY f1 ASC NULLS LAST
+(19 rows)
 
 select foo.f1, loct1.f1 from foo left join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10;
  f1 | f1 
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1f04a2c182c..f313279ba9e 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3934,8 +3934,14 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
 	/*
 	 * Get approx # tuples passing the mergequals.  We use approx_tuple_count
 	 * here because we need an estimate done with JOIN_INNER semantics.
+	 * However, if joinrestrictinfo consists only of those base hash/merge
+	 * clauses, we have already computed an equally (or more) accurate figure
+	 * in path->jpath.path.rows
 	 */
-	mergejointuples = approx_tuple_count(root, &path->jpath, mergeclauses);
+	if (list_length(path->jpath.joinrestrictinfo) > list_length(mergeclauses))
+		mergejointuples = approx_tuple_count(root, &path->jpath, mergeclauses);
+	else
+		mergejointuples = path->jpath.path.rows;
 
 	/*
 	 * When there are equal merge keys in the outer relation, the mergejoin
@@ -4525,9 +4531,14 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 		/*
 		 * Get approx # tuples passing the hashquals.  We use
 		 * approx_tuple_count here because we need an estimate done with
-		 * JOIN_INNER semantics.
+		 * JOIN_INNER semantics. However, if joinrestrictinfo consists
+		 * only of those base hash/merge clauses, we have already computed
+		 * an equally (or more) accurate figure in path->jpath.path.rows
 		 */
-		hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses);
+		if (list_length(path->jpath.joinrestrictinfo) > list_length(hashclauses))
+			hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses);
+		else
+			hashjointuples = path->jpath.path.rows;
 	}
 
 	/*
diff --git a/src/test/isolation/expected/merge-update.out b/src/test/isolation/expected/merge-update.out
index 677263d1ec1..3f8a591e815 100644
--- a/src/test/isolation/expected/merge-update.out
+++ b/src/test/isolation/expected/merge-update.out
@@ -44,15 +44,15 @@ step merge2a:
 
 merge_action|old                           |new                                                         |key|val                                                   
 ------------+------------------------------+------------------------------------------------------------+---+------------------------------------------------------
-UPDATE      |(2,"setup1 updated by merge1")|(3,"setup1 updated by merge1 source not matched by merge2a")|  3|setup1 updated by merge1 source not matched by merge2a
 INSERT      |                              |(1,merge2a)                                                 |  1|merge2a                                               
+UPDATE      |(2,"setup1 updated by merge1")|(3,"setup1 updated by merge1 source not matched by merge2a")|  3|setup1 updated by merge1 source not matched by merge2a
 (2 rows)
 
 step select2: SELECT * FROM target;
 key|val                                                   
 ---+------------------------------------------------------
-  3|setup1 updated by merge1 source not matched by merge2a
   1|merge2a                                               
+  3|setup1 updated by merge1 source not matched by merge2a
 (2 rows)
 
 step c2: COMMIT;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 46ddfa844c5..96a9486ccf5 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2288,10 +2288,10 @@ order by 1, 2;
 -------------------------------------------
  Sort
    Sort Key: i1.q1, i1.q2
-   ->  Hash Left Join
-         Hash Cond: (i1.q2 = i2.q2)
+   ->  Nested Loop Left Join
+         Join Filter: (i1.q2 = i2.q2)
          ->  Seq Scan on int8_tbl i1
-         ->  Hash
+         ->  Materialize
                ->  Seq Scan on int8_tbl i2
                      Filter: (q1 = 123)
 (8 rows)
@@ -4457,11 +4457,11 @@ select unique1, x from tenk1 full join f_immutable_int4(1) x on unique1 = x;
                      QUERY PLAN                     
 ----------------------------------------------------
  Merge Full Join
-   Merge Cond: (tenk1.unique1 = (1))
-   ->  Index Only Scan using tenk1_unique1 on tenk1
+   Merge Cond: ((1) = tenk1.unique1)
    ->  Sort
          Sort Key: (1)
          ->  Result
+   ->  Index Only Scan using tenk1_unique1 on tenk1
 (6 rows)
 
 -- check that pullup of a const function allows further const-folding
@@ -6036,15 +6036,14 @@ select * from int8_tbl t1 left join
   (int8_tbl t2 inner join int8_tbl t3 on false
    left join int8_tbl t4 on t2.q2 = t4.q2)
 on t1.q1 = t2.q1;
-              QUERY PLAN              
---------------------------------------
- Hash Left Join
-   Hash Cond: (t1.q1 = q1)
+           QUERY PLAN           
+--------------------------------
+ Nested Loop Left Join
+   Join Filter: (t1.q1 = q1)
    ->  Seq Scan on int8_tbl t1
-   ->  Hash
-         ->  Result
-               One-Time Filter: false
-(6 rows)
+   ->  Result
+         One-Time Filter: false
+(5 rows)
 
 -- deduce constant-false from an EquivalenceClass
 explain (costs off)
@@ -6052,15 +6051,14 @@ select * from int8_tbl t1 left join
   (int8_tbl t2 inner join int8_tbl t3 on (t2.q1-t3.q2) = 0 and (t2.q1-t3.q2) = 1
    left join int8_tbl t4 on t2.q2 = t4.q2)
 on t1.q1 = t2.q1;
-              QUERY PLAN              
---------------------------------------
- Hash Left Join
-   Hash Cond: (t1.q1 = q1)
+           QUERY PLAN           
+--------------------------------
+ Nested Loop Left Join
+   Join Filter: (t1.q1 = q1)
    ->  Seq Scan on int8_tbl t1
-   ->  Hash
-         ->  Result
-               One-Time Filter: false
-(6 rows)
+   ->  Result
+         One-Time Filter: false
+(5 rows)
 
 -- pseudoconstant based on an outer-level Param
 explain (costs off)
@@ -8445,12 +8443,12 @@ select * from int4_tbl a,
    Output: a.f1, b.f1, c.q1, c.q2
    ->  Seq Scan on public.int4_tbl a
          Output: a.f1
-   ->  Hash Left Join
+   ->  Nested Loop Left Join
          Output: b.f1, c.q1, c.q2
-         Hash Cond: (b.f1 = c.q1)
+         Join Filter: (b.f1 = c.q1)
          ->  Seq Scan on public.int4_tbl b
                Output: b.f1
-         ->  Hash
+         ->  Materialize
                Output: c.q1, c.q2
                ->  Seq Scan on public.int8_tbl c
                      Output: c.q1, c.q2
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index d5368186caa..c5d7c4eb23e 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1623,8 +1623,8 @@ EXPLAIN (COSTS OFF)
 SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2 t2 ON t1.a = t2.b, prt1 t3 WHERE t2.b = t3.a;
                     QUERY PLAN                    
 --------------------------------------------------
- Hash Left Join
-   Hash Cond: (t2.b = a)
+ Nested Loop Left Join
+   Join Filter: (a = t2.b)
    ->  Append
          ->  Hash Join
                Hash Cond: (t3_1.a = t2_1.b)
@@ -1641,10 +1641,9 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1
                ->  Seq Scan on prt1_p3 t3_3
                ->  Hash
                      ->  Seq Scan on prt2_p3 t2_3
-   ->  Hash
-         ->  Result
-               One-Time Filter: false
-(21 rows)
+   ->  Result
+         One-Time Filter: false
+(20 rows)
 
 EXPLAIN (COSTS OFF)
 SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 FULL JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
@@ -1652,8 +1651,8 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1
 --------------------------------------------
  Sort
    Sort Key: a, t2.b
-   ->  Hash Left Join
-         Hash Cond: (t2.b = a)
+   ->  Nested Loop Left Join
+         Join Filter: (a = t2.b)
          ->  Append
                ->  Seq Scan on prt2_p1 t2_1
                      Filter: (a = 0)
@@ -1661,10 +1660,9 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1
                      Filter: (a = 0)
                ->  Seq Scan on prt2_p3 t2_3
                      Filter: (a = 0)
-         ->  Hash
-               ->  Result
-                     One-Time Filter: false
-(14 rows)
+         ->  Result
+               One-Time Filter: false
+(13 rows)
 
 --
 -- tests for hash partitioned tables.
@@ -2238,20 +2236,19 @@ SELECT COUNT(*) FROM prt1_l t1 LEFT JOIN LATERAL
 -- join with one side empty
 EXPLAIN (COSTS OFF)
 SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c;
-                               QUERY PLAN                                
--------------------------------------------------------------------------
- Hash Left Join
-   Hash Cond: ((t2.b = a) AND (t2.a = b) AND ((t2.c)::text = (c)::text))
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
+ Nested Loop Left Join
+   Join Filter: ((a = t2.b) AND (b = t2.a) AND ((c)::text = (t2.c)::text))
    ->  Append
          ->  Seq Scan on prt2_l_p1 t2_1
          ->  Seq Scan on prt2_l_p2_p1 t2_2
          ->  Seq Scan on prt2_l_p2_p2 t2_3
          ->  Seq Scan on prt2_l_p3_p1 t2_4
          ->  Seq Scan on prt2_l_p3_p2 t2_5
-   ->  Hash
-         ->  Result
-               One-Time Filter: false
-(11 rows)
+   ->  Result
+         One-Time Filter: false
+(10 rows)
 
 -- Test case to verify proper handling of subqueries in a partitioned delete.
 -- The weird-looking lateral join is just there to force creation of a
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
index 0185ef661b1..418174991eb 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -1125,27 +1125,27 @@ reset role;
 explain (costs off, verbose)
   select count(*) from tenk1 a where (unique1, two) in
     (select unique1, row_number() over() from tenk1 b);
-                                       QUERY PLAN                                       
-----------------------------------------------------------------------------------------
+                                          QUERY PLAN                                          
+----------------------------------------------------------------------------------------------
  Aggregate
    Output: count(*)
-   ->  Hash Right Semi Join
-         Hash Cond: ((b.unique1 = a.unique1) AND ((row_number() OVER w1) = a.two))
-         ->  WindowAgg
-               Output: b.unique1, row_number() OVER w1
-               Window: w1 AS (ROWS UNBOUNDED PRECEDING)
-               ->  Gather
-                     Output: b.unique1
-                     Workers Planned: 4
-                     ->  Parallel Index Only Scan using tenk1_unique1 on public.tenk1 b
-                           Output: b.unique1
-         ->  Hash
+   ->  Hash Semi Join
+         Hash Cond: ((a.unique1 = b.unique1) AND (a.two = (row_number() OVER w1)))
+         ->  Gather
                Output: a.unique1, a.two
-               ->  Gather
+               Workers Planned: 4
+               ->  Parallel Seq Scan on public.tenk1 a
                      Output: a.unique1, a.two
-                     Workers Planned: 4
-                     ->  Parallel Seq Scan on public.tenk1 a
-                           Output: a.unique1, a.two
+         ->  Hash
+               Output: b.unique1, (row_number() OVER w1)
+               ->  WindowAgg
+                     Output: b.unique1, row_number() OVER w1
+                     Window: w1 AS (ROWS UNBOUNDED PRECEDING)
+                     ->  Gather
+                           Output: b.unique1
+                           Workers Planned: 4
+                           ->  Parallel Index Only Scan using tenk1_unique1 on public.tenk1 b
+                                 Output: b.unique1
 (19 rows)
 
 -- LIMIT/OFFSET within sub-selects can't be pushed to workers.
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 095df0a670c..aea5b52a70c 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -520,9 +520,9 @@ MERGE INTO rw_view1 t
  merge_action | a | b  |                  old                   |                  new                   | a |      b      |   c   |       d        | a |      b      |   c   |       d        | a |      b      |   c   |       d        
 --------------+---+----+----------------------------------------+----------------------------------------+---+-------------+-------+----------------+---+-------------+-------+----------------+---+-------------+-------+----------------
  UPDATE       | 1 | R1 | (1,"ROW 1",Const,"b: ROW 1")           | (1,R1,Const,"b: R1")                   | 1 | ROW 1       | Const | b: ROW 1       | 1 | R1          | Const | b: R1          | 1 | R1          | Const | b: R1
- DELETE       |   |    | (5,Unspecified,Const,"b: Unspecified") |                                        | 5 | Unspecified | Const | b: Unspecified |   |             |       |                | 5 | Unspecified | Const | b: Unspecified
  DELETE       | 2 | R2 | (2,Unspecified,Const,"b: Unspecified") |                                        | 2 | Unspecified | Const | b: Unspecified |   |             |       |                | 2 | Unspecified | Const | b: Unspecified
  INSERT       | 3 | R3 |                                        | (3,Unspecified,Const,"b: Unspecified") |   |             |       |                | 3 | Unspecified | Const | b: Unspecified | 3 | Unspecified | Const | b: Unspecified
+ DELETE       |   |    | (5,Unspecified,Const,"b: Unspecified") |                                        | 5 | Unspecified | Const | b: Unspecified |   |             |       |                | 5 | Unspecified | Const | b: Unspecified
 (4 rows)
 
 SELECT * FROM base_tbl ORDER BY a;
@@ -585,14 +585,14 @@ MERGE INTO rw_view1 t
                             QUERY PLAN                             
 -------------------------------------------------------------------
  Merge on base_tbl
-   ->  Hash Left Join
-         Hash Cond: (base_tbl.a = generate_series.generate_series)
-         ->  Bitmap Heap Scan on base_tbl
-               Recheck Cond: (a > 0)
-               ->  Bitmap Index Scan on base_tbl_pkey
-                     Index Cond: (a > 0)
+   ->  Hash Right Join
+         Hash Cond: (generate_series.generate_series = base_tbl.a)
+         ->  Function Scan on generate_series
          ->  Hash
-               ->  Function Scan on generate_series
+               ->  Bitmap Heap Scan on base_tbl
+                     Recheck Cond: (a > 0)
+                     ->  Bitmap Index Scan on base_tbl_pkey
+                           Index Cond: (a > 0)
 (9 rows)
 
 EXPLAIN (costs off)
-- 
2.34.1

Improve Hash/Merge Join estimate accuracy when all predicates are Hash/Merge clauses

Reply via email to