Updated patchset is attached

On 4/14/25 17:25, Maksim Milyutin wrote:
On 4/11/25 17:00, Tom Lane wrote:
Maksim Milyutin<maksim.milyu...@tantorlabs.ru> writes:
I've noticed that COALESCE function doesn't converge to argument
expression if it is alone in argument list of COALESCE as part
simplification routine for expressions in planner. This might suppress
further useful transformations when non-strict ops are required from
some expression like converging OUTER JOIN to INNER one with WHERE qual
containing COALESCE over single column from inner side.
Seems like a reasonable idea --- it's probably a rare case, but the
check is cheap enough.  I'd add some comments though.


Thanks for your comments.


Please add this to the open commitfest so we don't lose track of it.


Done. In regression tests I've replaced all COALESCEs with single argument to ones with dummy second argument to preserve coalesce calls as AFAICS their usages are intentional for wrapping attributes to generate PHVs above.


Also I've noticed the issue in query (in join.sql test suite):

SELECT 1 FROM group_tbl t1
    LEFT JOIN (SELECT a c1, *COALESCE(a)* c2 FROM group_tbl t2) s ON TRUE
GROUP BY s.c1, s.c2

repeatable t2.a in GROUP BY clauses are not converged to single appearance:

                 QUERY PLAN
--------------------------------------------
 Group
   Group Key: t2.a, *t2.a*
   ->  Sort
         Sort Key: t2.a, *t2.a*
         ->  Nested Loop Left Join
               ->  Seq Scan on group_tbl t1
               ->  Seq Scan on group_tbl t2

IMO the cause is in PHV surrounding s.c2 that differentiates its internal expression with the same first grouping key.

--
Best regard,
Maksim Milyutin
From 294d7fdf105dac6a17a6ac94a832f4cdf02e6060 Mon Sep 17 00:00:00 2001
From: Maksim Milyutin <milyuti...@gmail.com>
Date: Mon, 14 Apr 2025 17:06:50 +0300
Subject: [PATCH v1 2/2] Adjust regression tests

---
 src/test/regress/expected/join.out      | 32 ++++++++---------
 src/test/regress/expected/subselect.out | 46 ++++++++++++-------------
 src/test/regress/sql/join.sql           | 10 +++---
 src/test/regress/sql/subselect.sql      |  8 ++---
 4 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 14da5708451..6c55eeda6be 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5384,14 +5384,14 @@ select * from
   (select 1 as id) as xx
   left join
     (tenk1 as a1 full join (select 1 as id) as yy on (a1.unique1 = yy.id))
-  on (xx.id = coalesce(yy.id));
-              QUERY PLAN               
----------------------------------------
+  on (xx.id = coalesce(yy.id, 0));
+               QUERY PLAN               
+----------------------------------------
  Nested Loop Left Join
    ->  Result
    ->  Hash Full Join
          Hash Cond: (a1.unique1 = (1))
-         Filter: (1 = COALESCE((1)))
+         Filter: (1 = COALESCE((1), 0))
          ->  Seq Scan on tenk1 a1
          ->  Hash
                ->  Result
@@ -5401,7 +5401,7 @@ select * from
   (select 1 as id) as xx
   left join
     (tenk1 as a1 full join (select 1 as id) as yy on (a1.unique1 = yy.id))
-  on (xx.id = coalesce(yy.id));
+  on (xx.id = coalesce(yy.id, 0));
  id | unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 | id 
 ----+---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------+----
   1 |       1 |    2838 |   1 |    1 |   1 |      1 |       1 |        1 |           1 |         1 |        1 |   2 |    3 | BAAAAA   | EFEAAA   | OOOOxx  |  1
@@ -8123,20 +8123,20 @@ select * from int4_tbl i left join
 
 explain (verbose, costs off)
 select * from int4_tbl i left join
-  lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true;
-             QUERY PLAN              
--------------------------------------
+  lateral (select coalesce(i, row(0)::int4_tbl) from int2_tbl j where i.f1 = j.f1) k on true;
+                      QUERY PLAN                      
+------------------------------------------------------
  Nested Loop Left Join
-   Output: i.f1, (COALESCE(i.*))
+   Output: i.f1, (COALESCE(i.*, '(0)'::int4_tbl))
    ->  Seq Scan on public.int4_tbl i
          Output: i.f1, i.*
    ->  Seq Scan on public.int2_tbl j
-         Output: j.f1, COALESCE(i.*)
+         Output: j.f1, COALESCE(i.*, '(0)'::int4_tbl)
          Filter: (i.f1 = j.f1)
 (7 rows)
 
 select * from int4_tbl i left join
-  lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true;
+  lateral (select coalesce(i, row(0)::int4_tbl) from int2_tbl j where i.f1 = j.f1) k on true;
      f1      | coalesce 
 -------------+----------
            0 | (0)
@@ -9305,14 +9305,14 @@ CREATE STATISTICS group_tbl_stat (ndistinct) ON a, b FROM group_tbl;
 ANALYZE group_tbl;
 EXPLAIN (COSTS OFF)
 SELECT 1 FROM group_tbl t1
-    LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE
+    LEFT JOIN (SELECT a c1, COALESCE(a, 0) c2 FROM group_tbl t2) s ON TRUE
 GROUP BY s.c1, s.c2;
-                 QUERY PLAN                 
---------------------------------------------
+                 QUERY PLAN                  
+---------------------------------------------
  Group
-   Group Key: t2.a, (COALESCE(t2.a))
+   Group Key: t2.a, (COALESCE(t2.a, 0))
    ->  Sort
-         Sort Key: t2.a, (COALESCE(t2.a))
+         Sort Key: t2.a, (COALESCE(t2.a, 0))
          ->  Nested Loop Left Join
                ->  Seq Scan on group_tbl t1
                ->  Seq Scan on group_tbl t2
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 40d8056fcea..8af48e2ce13 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -2127,30 +2127,30 @@ explain (verbose, costs off)
 select ss2.* from
   int8_tbl t1 left join
   (int8_tbl t2 left join
-   (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
+   (select coalesce(q1, 0) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
    lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
   on t1.q2 = ss2.q1
 order by 1, 2, 3;
-                           QUERY PLAN                           
-----------------------------------------------------------------
+                                 QUERY PLAN                                  
+-----------------------------------------------------------------------------
  Sort
-   Output: (COALESCE(t3.q1)), t4.q1, t4.q2
-   Sort Key: (COALESCE(t3.q1)), t4.q1, t4.q2
+   Output: (COALESCE(t3.q1, '0'::bigint)), t4.q1, t4.q2
+   Sort Key: (COALESCE(t3.q1, '0'::bigint)), t4.q1, t4.q2
    ->  Hash Right Join
-         Output: (COALESCE(t3.q1)), t4.q1, t4.q2
+         Output: (COALESCE(t3.q1, '0'::bigint)), t4.q1, t4.q2
          Hash Cond: (t4.q1 = t1.q2)
          ->  Hash Join
-               Output: (COALESCE(t3.q1)), t4.q1, t4.q2
+               Output: (COALESCE(t3.q1, '0'::bigint)), t4.q1, t4.q2
                Hash Cond: (t2.q2 = t4.q1)
                ->  Hash Left Join
-                     Output: t2.q2, (COALESCE(t3.q1))
+                     Output: t2.q2, (COALESCE(t3.q1, '0'::bigint))
                      Hash Cond: (t2.q1 = t3.q2)
                      ->  Seq Scan on public.int8_tbl t2
                            Output: t2.q1, t2.q2
                      ->  Hash
-                           Output: t3.q2, (COALESCE(t3.q1))
+                           Output: t3.q2, (COALESCE(t3.q1, '0'::bigint))
                            ->  Seq Scan on public.int8_tbl t3
-                                 Output: t3.q2, COALESCE(t3.q1)
+                                 Output: t3.q2, COALESCE(t3.q1, '0'::bigint)
                ->  Hash
                      Output: t4.q1, t4.q2
                      ->  Seq Scan on public.int8_tbl t4
@@ -2164,7 +2164,7 @@ order by 1, 2, 3;
 select ss2.* from
   int8_tbl t1 left join
   (int8_tbl t2 left join
-   (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
+   (select coalesce(q1, 0) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
    lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
   on t1.q2 = ss2.q1
 order by 1, 2, 3;
@@ -2201,32 +2201,32 @@ explain (verbose, costs off)
 select ss2.* from
   int8_tbl t1 left join
   (int8_tbl t2 left join
-   (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join
+   (select coalesce(q1, 0) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join
    lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
   on t1.q2 = ss2.q1
 order by 1, 2, 3;
-                           QUERY PLAN                           
-----------------------------------------------------------------
+                                 QUERY PLAN                                  
+-----------------------------------------------------------------------------
  Sort
-   Output: ((COALESCE(t3.q1))), t4.q1, t4.q2
-   Sort Key: ((COALESCE(t3.q1))), t4.q1, t4.q2
+   Output: ((COALESCE(t3.q1, '0'::bigint))), t4.q1, t4.q2
+   Sort Key: ((COALESCE(t3.q1, '0'::bigint))), t4.q1, t4.q2
    ->  Hash Right Join
-         Output: ((COALESCE(t3.q1))), t4.q1, t4.q2
+         Output: ((COALESCE(t3.q1, '0'::bigint))), t4.q1, t4.q2
          Hash Cond: (t4.q1 = t1.q2)
          ->  Nested Loop
-               Output: t4.q1, t4.q2, ((COALESCE(t3.q1)))
+               Output: t4.q1, t4.q2, ((COALESCE(t3.q1, '0'::bigint)))
                Join Filter: (t2.q2 = t4.q1)
                ->  Hash Left Join
-                     Output: t2.q2, (COALESCE(t3.q1))
+                     Output: t2.q2, (COALESCE(t3.q1, '0'::bigint))
                      Hash Cond: (t2.q1 = t3.q2)
                      ->  Seq Scan on public.int8_tbl t2
                            Output: t2.q1, t2.q2
                      ->  Hash
-                           Output: t3.q2, (COALESCE(t3.q1))
+                           Output: t3.q2, (COALESCE(t3.q1, '0'::bigint))
                            ->  Seq Scan on public.int8_tbl t3
-                                 Output: t3.q2, COALESCE(t3.q1)
+                                 Output: t3.q2, COALESCE(t3.q1, '0'::bigint)
                ->  Seq Scan on public.int8_tbl t4
-                     Output: t4.q1, t4.q2, (COALESCE(t3.q1))
+                     Output: t4.q1, t4.q2, (COALESCE(t3.q1, '0'::bigint))
          ->  Hash
                Output: t1.q2
                ->  Seq Scan on public.int8_tbl t1
@@ -2236,7 +2236,7 @@ order by 1, 2, 3;
 select ss2.* from
   int8_tbl t1 left join
   (int8_tbl t2 left join
-   (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join
+   (select coalesce(q1, 0) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join
    lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
   on t1.q2 = ss2.q1
 order by 1, 2, 3;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index c29d13b9fed..89173e6629c 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1902,13 +1902,13 @@ select * from
   (select 1 as id) as xx
   left join
     (tenk1 as a1 full join (select 1 as id) as yy on (a1.unique1 = yy.id))
-  on (xx.id = coalesce(yy.id));
+  on (xx.id = coalesce(yy.id, 0));
 
 select * from
   (select 1 as id) as xx
   left join
     (tenk1 as a1 full join (select 1 as id) as yy on (a1.unique1 = yy.id))
-  on (xx.id = coalesce(yy.id));
+  on (xx.id = coalesce(yy.id, 0));
 
 --
 -- test ability to push constants through outer join clauses
@@ -3067,9 +3067,9 @@ select * from int4_tbl i left join
   lateral (select * from int2_tbl j where i.f1 = j.f1) k on true;
 explain (verbose, costs off)
 select * from int4_tbl i left join
-  lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true;
+  lateral (select coalesce(i, row(0)::int4_tbl) from int2_tbl j where i.f1 = j.f1) k on true;
 select * from int4_tbl i left join
-  lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true;
+  lateral (select coalesce(i, row(0)::int4_tbl) from int2_tbl j where i.f1 = j.f1) k on true;
 explain (verbose, costs off)
 select * from int4_tbl a,
   lateral (
@@ -3535,7 +3535,7 @@ ANALYZE group_tbl;
 
 EXPLAIN (COSTS OFF)
 SELECT 1 FROM group_tbl t1
-    LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE
+    LEFT JOIN (SELECT a c1, COALESCE(a, 0) c2 FROM group_tbl t2) s ON TRUE
 GROUP BY s.c1, s.c2;
 
 DROP TABLE group_tbl;
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index fec38ef85a6..2cff59e6912 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -1041,7 +1041,7 @@ explain (verbose, costs off)
 select ss2.* from
   int8_tbl t1 left join
   (int8_tbl t2 left join
-   (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
+   (select coalesce(q1, 0) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
    lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
   on t1.q2 = ss2.q1
 order by 1, 2, 3;
@@ -1049,7 +1049,7 @@ order by 1, 2, 3;
 select ss2.* from
   int8_tbl t1 left join
   (int8_tbl t2 left join
-   (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
+   (select coalesce(q1, 0) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
    lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
   on t1.q2 = ss2.q1
 order by 1, 2, 3;
@@ -1059,7 +1059,7 @@ explain (verbose, costs off)
 select ss2.* from
   int8_tbl t1 left join
   (int8_tbl t2 left join
-   (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join
+   (select coalesce(q1, 0) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join
    lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
   on t1.q2 = ss2.q1
 order by 1, 2, 3;
@@ -1067,7 +1067,7 @@ order by 1, 2, 3;
 select ss2.* from
   int8_tbl t1 left join
   (int8_tbl t2 left join
-   (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join
+   (select coalesce(q1, 0) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join
    lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
   on t1.q2 = ss2.q1
 order by 1, 2, 3;
-- 
2.43.0

From 8c1f606de02ff7aa8799046abc067949b76d4046 Mon Sep 17 00:00:00 2001
From: Maksim Milyutin <milyuti...@gmail.com>
Date: Fri, 11 Apr 2025 15:43:42 +0300
Subject: [PATCH v1 1/2] Simplify COALESCE with single argument

---
 src/backend/optimizer/util/clauses.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 26a3e050086..c15a3a41cb6 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -3333,6 +3333,13 @@ eval_const_expressions_mutator(Node *node,
 												  -1,
 												  coalesceexpr->coalescecollid);
 
+				/*
+				 * COALESCE with single valued argument looks like identity
+				 * function so that it's converged to its single argument
+				 */
+				if (list_length(newargs) == 1)
+					return (Node *) linitial(newargs);
+
 				newcoalesce = makeNode(CoalesceExpr);
 				newcoalesce->coalescetype = coalesceexpr->coalescetype;
 				newcoalesce->coalescecollid = coalesceexpr->coalescecollid;
-- 
2.43.0

Reply via email to