From 3262a26abb4b6e43b6da0fc9537ff32541d7feee Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Mon, 1 Dec 2025 10:38:44 +0900
Subject: [PATCH v4 1/3] Simplify COALESCE expressions using non-nullable
 arguments

The COALESCE function returns the first of its arguments that is not
null.  When an argument is proven non-null, if it is the first
non-null-constant argument, the entire COALESCE expression can be
replaced by that argument.  If it is a subsequent argument, all
following arguments can be dropped, since they will never be reached.

Currently, we perform this simplification only for Const arguments.
This patch extends the simplification to support any expression that
can be proven non-nullable.

This can help avoid the overhead of evaluating unreachable arguments.
It can also lead to better plans when the first argument is proven
non-nullable and replaces the expression, as the planner no longer has
to treat the expression as non-strict, and can also leverage index
scans on the resulting expression.

There is an ensuing plan change in generated_virtual.out, and we have
to modify the test to ensure that it continues to test what it is
intended to.
---
 src/backend/optimizer/util/clauses.c          | 18 +++++--
 .../regress/expected/generated_virtual.out    | 48 ++++++++++---------
 src/test/regress/expected/predicate.out       | 33 +++++++++++++
 src/test/regress/sql/generated_virtual.sql    | 11 +++--
 src/test/regress/sql/predicate.sql            | 20 ++++++++
 5 files changed, 97 insertions(+), 33 deletions(-)

diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index bda4c4eb292..2583cd66509 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -3325,10 +3325,10 @@ eval_const_expressions_mutator(Node *node,
 													   context);
 
 					/*
-					 * We can remove null constants from the list. For a
-					 * non-null constant, if it has not been preceded by any
-					 * other non-null-constant expressions then it is the
-					 * result. Otherwise, it's the next argument, but we can
+					 * We can remove null constants from the list.  For a
+					 * nonnullable expression, if it has not been preceded by
+					 * any non-null-constant expressions then it is the
+					 * result.  Otherwise, it's the next argument, but we can
 					 * drop following arguments since they will never be
 					 * reached.
 					 */
@@ -3341,6 +3341,14 @@ eval_const_expressions_mutator(Node *node,
 						newargs = lappend(newargs, e);
 						break;
 					}
+					if (expr_is_nonnullable(context->root, (Expr *) e, false))
+					{
+						if (newargs == NIL)
+							return e;	/* first expr */
+						newargs = lappend(newargs, e);
+						break;
+					}
+
 					newargs = lappend(newargs, e);
 				}
 
@@ -4328,7 +4336,7 @@ var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info)
 bool
 expr_is_nonnullable(PlannerInfo *root, Expr *expr, bool use_rel_info)
 {
-	if (IsA(expr, Var))
+	if (IsA(expr, Var) && root)
 		return var_is_nonnullable(root, (Var *) expr, use_rel_info);
 	if (IsA(expr, Const))
 		return !castNode(Const, expr)->constisnull;
diff --git a/src/test/regress/expected/generated_virtual.out b/src/test/regress/expected/generated_virtual.out
index dde325e46c6..249e68be654 100644
--- a/src/test/regress/expected/generated_virtual.out
+++ b/src/test/regress/expected/generated_virtual.out
@@ -1509,10 +1509,11 @@ create table gtest32 (
   a int primary key,
   b int generated always as (a * 2),
   c int generated always as (10 + 10),
-  d int generated always as (coalesce(a, 100)),
-  e int
+  d int generated always as (coalesce(f, 100)),
+  e int,
+  f int
 );
-insert into gtest32 values (1), (2);
+insert into gtest32 (a, f) values (1, 1), (2, 2);
 analyze gtest32;
 -- Ensure that nullingrel bits are propagated into the generation expressions
 explain (costs off)
@@ -1591,46 +1592,47 @@ where coalesce(t2.b, 1) = 2 or t1.a is null;
 -- Ensure that the generation expressions are wrapped into PHVs if needed
 explain (verbose, costs off)
 select t2.* from gtest32 t1 left join gtest32 t2 on false;
-                          QUERY PLAN                           
----------------------------------------------------------------
+                             QUERY PLAN                              
+---------------------------------------------------------------------
  Nested Loop Left Join
-   Output: t2.a, (t2.a * 2), (20), (COALESCE(t2.a, 100)), t2.e
+   Output: t2.a, (t2.a * 2), (20), (COALESCE(t2.f, 100)), t2.e, t2.f
    Join Filter: false
    ->  Seq Scan on generated_virtual_tests.gtest32 t1
-         Output: t1.a, t1.b, t1.c, t1.d, t1.e
+         Output: t1.a, t1.b, t1.c, t1.d, t1.e, t1.f
    ->  Result
-         Output: t2.a, t2.e, 20, COALESCE(t2.a, 100)
+         Output: t2.a, t2.e, t2.f, 20, COALESCE(t2.f, 100)
          Replaces: Scan on t2
          One-Time Filter: false
 (9 rows)
 
 select t2.* from gtest32 t1 left join gtest32 t2 on false;
- a | b | c | d | e 
----+---+---+---+---
-   |   |   |   |  
-   |   |   |   |  
+ a | b | c | d | e | f 
+---+---+---+---+---+---
+   |   |   |   |   |  
+   |   |   |   |   |  
 (2 rows)
 
 explain (verbose, costs off)
-select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20;
-                     QUERY PLAN                      
------------------------------------------------------
+select * from gtest32 t group by grouping sets (a, b, c, d, e, f) having c = 20;
+                       QUERY PLAN                       
+--------------------------------------------------------
  HashAggregate
-   Output: a, ((a * 2)), (20), (COALESCE(a, 100)), e
+   Output: a, ((a * 2)), (20), (COALESCE(f, 100)), e, f
    Hash Key: t.a
    Hash Key: (t.a * 2)
    Hash Key: 20
-   Hash Key: COALESCE(t.a, 100)
+   Hash Key: COALESCE(t.f, 100)
    Hash Key: t.e
+   Hash Key: t.f
    Filter: ((20) = 20)
    ->  Seq Scan on generated_virtual_tests.gtest32 t
-         Output: a, (a * 2), 20, COALESCE(a, 100), e
-(10 rows)
+         Output: a, (a * 2), 20, COALESCE(f, 100), e, f
+(11 rows)
 
-select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20;
- a | b | c  | d | e 
----+---+----+---+---
-   |   | 20 |   |  
+select * from gtest32 t group by grouping sets (a, b, c, d, e, f) having c = 20;
+ a | b | c  | d | e | f 
+---+---+----+---+---+---
+   |   | 20 |   |   |  
 (1 row)
 
 -- Ensure that the virtual generated columns in ALTER COLUMN TYPE USING expression are expanded
diff --git a/src/test/regress/expected/predicate.out b/src/test/regress/expected/predicate.out
index 66fb0854b88..9f9f795e892 100644
--- a/src/test/regress/expected/predicate.out
+++ b/src/test/regress/expected/predicate.out
@@ -443,3 +443,36 @@ SELECT * FROM pred_tab2, pred_tab1 WHERE pred_tab1.a IS NULL OR pred_tab1.b < 2;
 RESET constraint_exclusion;
 DROP TABLE pred_tab1;
 DROP TABLE pred_tab2;
+--
+-- Test that COALESCE expressions in predicates are simplified using
+-- non-nullable arguments.
+--
+CREATE TABLE pred_tab (a int NOT NULL, b int);
+-- Ensure that constant NULL arguments are dropped
+EXPLAIN (COSTS OFF)
+SELECT * FROM pred_tab WHERE COALESCE(NULL, b, NULL, a) > 1;
+           QUERY PLAN           
+--------------------------------
+ Seq Scan on pred_tab
+   Filter: (COALESCE(b, a) > 1)
+(2 rows)
+
+-- Ensure that argument "b*a" is dropped
+EXPLAIN (COSTS OFF)
+SELECT * FROM pred_tab WHERE COALESCE(b, a, b*a) > 1;
+           QUERY PLAN           
+--------------------------------
+ Seq Scan on pred_tab
+   Filter: (COALESCE(b, a) > 1)
+(2 rows)
+
+-- Ensure that the entire COALESCE expression is replaced by "a"
+EXPLAIN (COSTS OFF)
+SELECT * FROM pred_tab WHERE COALESCE(a, b) > 1;
+      QUERY PLAN      
+----------------------
+ Seq Scan on pred_tab
+   Filter: (a > 1)
+(2 rows)
+
+DROP TABLE pred_tab;
diff --git a/src/test/regress/sql/generated_virtual.sql b/src/test/regress/sql/generated_virtual.sql
index 2911439776c..81152b39a79 100644
--- a/src/test/regress/sql/generated_virtual.sql
+++ b/src/test/regress/sql/generated_virtual.sql
@@ -817,11 +817,12 @@ create table gtest32 (
   a int primary key,
   b int generated always as (a * 2),
   c int generated always as (10 + 10),
-  d int generated always as (coalesce(a, 100)),
-  e int
+  d int generated always as (coalesce(f, 100)),
+  e int,
+  f int
 );
 
-insert into gtest32 values (1), (2);
+insert into gtest32 (a, f) values (1, 1), (2, 2);
 analyze gtest32;
 
 -- Ensure that nullingrel bits are propagated into the generation expressions
@@ -859,8 +860,8 @@ select t2.* from gtest32 t1 left join gtest32 t2 on false;
 select t2.* from gtest32 t1 left join gtest32 t2 on false;
 
 explain (verbose, costs off)
-select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20;
-select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20;
+select * from gtest32 t group by grouping sets (a, b, c, d, e, f) having c = 20;
+select * from gtest32 t group by grouping sets (a, b, c, d, e, f) having c = 20;
 
 -- Ensure that the virtual generated columns in ALTER COLUMN TYPE USING expression are expanded
 alter table gtest32 alter column e type bigint using b;
diff --git a/src/test/regress/sql/predicate.sql b/src/test/regress/sql/predicate.sql
index 32302d60b6d..0c7e4b974ff 100644
--- a/src/test/regress/sql/predicate.sql
+++ b/src/test/regress/sql/predicate.sql
@@ -221,3 +221,23 @@ SELECT * FROM pred_tab2, pred_tab1 WHERE pred_tab1.a IS NULL OR pred_tab1.b < 2;
 RESET constraint_exclusion;
 DROP TABLE pred_tab1;
 DROP TABLE pred_tab2;
+
+--
+-- Test that COALESCE expressions in predicates are simplified using
+-- non-nullable arguments.
+--
+CREATE TABLE pred_tab (a int NOT NULL, b int);
+
+-- Ensure that constant NULL arguments are dropped
+EXPLAIN (COSTS OFF)
+SELECT * FROM pred_tab WHERE COALESCE(NULL, b, NULL, a) > 1;
+
+-- Ensure that argument "b*a" is dropped
+EXPLAIN (COSTS OFF)
+SELECT * FROM pred_tab WHERE COALESCE(b, a, b*a) > 1;
+
+-- Ensure that the entire COALESCE expression is replaced by "a"
+EXPLAIN (COSTS OFF)
+SELECT * FROM pred_tab WHERE COALESCE(a, b) > 1;
+
+DROP TABLE pred_tab;
-- 
2.39.5 (Apple Git-154)

