On Fri, 29 Nov 2024 at 13:10, Dean Rasheed <dean.a.rash...@gmail.com> wrote:
>
> There are a couple more things that I think need tidying up. I'll post an 
> update when I get back to my computer.
>

Here's an update with some cosmetic tidying up, plus a couple of
not-so-cosmetic changes:

The new #include wasn't in the right place alphabetically (the same is
true for the recent timestamp equivalent function).

It's not necessary to call init_var() for a variable that you're going
to initialise with init_var_from_num(), and it's then not necessary to
call free_var() for that variable.

It's not necessary to have separate NumericVars for the difference and
quotient -- the same variable can be reused.

Doing both those things means that there's only one variable to free
after the computation, and it can be kept local to if-step-not-zero
code block, so there's no need for the "goto cleanup" stuff.

It seems worth avoiding div_var() in the 2-argument case, when step = 1.

Regards,
Dean
From b2de4cdc4f0c2752a734ce9dd2a55d92644ba92e Mon Sep 17 00:00:00 2001
From: Dean Rasheed <dean.a.rash...@gmail.com>
Date: Fri, 29 Nov 2024 19:49:28 +0000
Subject: [PATCH v4] Add a planner support function for numeric
 generate_series().

This allows the planner to estimate the number of rows returned by
generate_series(numeric, numeric[, numeric]), when the input values
can be estimated at plan time.

Song Jinzhou, reviewed by Dean Rasheed and David Rowley.

Discussion: https://postgr.es/m/tencent_F43E7F4DD50EF5986D1051DE8DE547910206%40qq.com
Discussion: https://postgr.es/m/tencent_1F6D5B9A1545E02FD7D0EE508DFD056DE50A%40qq.com
---
 src/backend/utils/adt/numeric.c              | 121 +++++++++++++++++++
 src/include/catalog/pg_proc.dat              |   9 +-
 src/test/regress/expected/misc_functions.out |  65 ++++++++++
 src/test/regress/sql/misc_functions.sql      |  38 ++++++
 4 files changed, 231 insertions(+), 2 deletions(-)

diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 344d7137f9..5c33cd3ab0 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -34,6 +34,7 @@
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/float.h"
@@ -1827,6 +1828,126 @@ generate_series_step_numeric(PG_FUNCTION_ARGS)
 		SRF_RETURN_DONE(funcctx);
 }
 
+/*
+ * Planner support function for generate_series(numeric, numeric [, numeric])
+ */
+Datum
+generate_series_numeric_support(PG_FUNCTION_ARGS)
+{
+	Node	   *rawreq = (Node *) PG_GETARG_POINTER(0);
+	Node	   *ret = NULL;
+
+	if (IsA(rawreq, SupportRequestRows))
+	{
+		/* Try to estimate the number of rows returned */
+		SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+		if (is_funcclause(req->node))	/* be paranoid */
+		{
+			List	   *args = ((FuncExpr *) req->node)->args;
+			Node	   *arg1,
+					   *arg2,
+					   *arg3;
+
+			/* We can use estimated argument values here */
+			arg1 = estimate_expression_value(req->root, linitial(args));
+			arg2 = estimate_expression_value(req->root, lsecond(args));
+			if (list_length(args) >= 3)
+				arg3 = estimate_expression_value(req->root, lthird(args));
+			else
+				arg3 = NULL;
+
+			/*
+			 * If any argument is constant NULL, we can safely assume that
+			 * zero rows are returned.  Otherwise, if they're all non-NULL
+			 * constants, we can calculate the number of rows that will be
+			 * returned.
+			 */
+			if ((IsA(arg1, Const) &&
+				 ((Const *) arg1)->constisnull) ||
+				(IsA(arg2, Const) &&
+				 ((Const *) arg2)->constisnull) ||
+				(arg3 != NULL && IsA(arg3, Const) &&
+				 ((Const *) arg3)->constisnull))
+			{
+				req->rows = 0;
+				ret = (Node *) req;
+			}
+			else if (IsA(arg1, Const) &&
+					 IsA(arg2, Const) &&
+					 (arg3 == NULL || IsA(arg3, Const)))
+			{
+				Numeric		start_num;
+				Numeric		stop_num;
+				NumericVar	step = const_one;
+
+				/*
+				 * If any argument is NaN or infinity, generate_series() will
+				 * error out, so we needn't produce an estimate.
+				 */
+				start_num = DatumGetNumeric(((Const *) arg1)->constvalue);
+				stop_num = DatumGetNumeric(((Const *) arg2)->constvalue);
+
+				if (NUMERIC_IS_SPECIAL(start_num) ||
+					NUMERIC_IS_SPECIAL(stop_num))
+					PG_RETURN_POINTER(NULL);
+
+				if (arg3)
+				{
+					Numeric		step_num;
+
+					step_num = DatumGetNumeric(((Const *) arg3)->constvalue);
+
+					if (NUMERIC_IS_SPECIAL(step_num))
+						PG_RETURN_POINTER(NULL);
+
+					init_var_from_num(step_num, &step);
+				}
+
+				/*
+				 * The number of rows that will be returned is given by
+				 * floor((stop - start) / step) + 1, if the sign of step
+				 * matches the sign of stop - start.  Otherwise, no rows will
+				 * be returned.
+				 */
+				if (cmp_var(&step, &const_zero) != 0)
+				{
+					NumericVar	start;
+					NumericVar	stop;
+					NumericVar	res;
+
+					init_var_from_num(start_num, &start);
+					init_var_from_num(stop_num, &stop);
+
+					init_var(&res);
+					sub_var(&stop, &start, &res);
+
+					if (step.sign != res.sign)
+					{
+						/* no rows will be returned */
+						req->rows = 0;
+						ret = (Node *) req;
+					}
+					else
+					{
+						if (arg3)
+							div_var(&res, &step, &res, 0, false, false);
+						else
+							trunc_var(&res, 0); /* step = 1 */
+
+						req->rows = numericvar_to_double_no_overflow(&res) + 1;
+						ret = (Node *) req;
+					}
+
+					free_var(&res);
+				}
+			}
+		}
+	}
+
+	PG_RETURN_POINTER(ret);
+}
+
 
 /*
  * Implements the numeric version of the width_bucket() function
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index cbbe8acd38..9575524007 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -8464,13 +8464,18 @@
   proname => 'generate_series_int8_support', prorettype => 'internal',
   proargtypes => 'internal', prosrc => 'generate_series_int8_support' },
 { oid => '3259', descr => 'non-persistent series generator',
-  proname => 'generate_series', prorows => '1000', proretset => 't',
+  proname => 'generate_series', prorows => '1000',
+  prosupport => 'generate_series_numeric_support', proretset => 't',
   prorettype => 'numeric', proargtypes => 'numeric numeric numeric',
   prosrc => 'generate_series_step_numeric' },
 { oid => '3260', descr => 'non-persistent series generator',
-  proname => 'generate_series', prorows => '1000', proretset => 't',
+  proname => 'generate_series', prorows => '1000',
+  prosupport => 'generate_series_numeric_support', proretset => 't',
   prorettype => 'numeric', proargtypes => 'numeric numeric',
   prosrc => 'generate_series_numeric' },
+{ oid => '8405', descr => 'planner support for generate_series',
+  proname => 'generate_series_numeric_support', prorettype => 'internal',
+  proargtypes => 'internal', prosrc => 'generate_series_numeric_support' },
 { oid => '938', descr => 'non-persistent series generator',
   proname => 'generate_series', prorows => '1000',
   prosupport => 'generate_series_timestamp_support', proretset => 't',
diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out
index 36b1201f9f..8c73d2b621 100644
--- a/src/test/regress/expected/misc_functions.out
+++ b/src/test/regress/expected/misc_functions.out
@@ -712,6 +712,71 @@ false, true, false, true);
 -- the support function.
 SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
 ERROR:  step size cannot equal zero
+--
+-- Test the SupportRequestRows support function for generate_series_numeric()
+--
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0) g(s);$$,
+true, true, false, true);
+                                    explain_mask_costs                                    
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g  (cost=N..N rows=25 width=N) (actual rows=25 loops=1)
+(1 row)
+
+-- As above but with non-default step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$,
+true, true, false, true);
+                                    explain_mask_costs                                    
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g  (cost=N..N rows=13 width=N) (actual rows=13 loops=1)
+(1 row)
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$,
+true, true, false, true);
+                                    explain_mask_costs                                    
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g  (cost=N..N rows=25 width=N) (actual rows=25 loops=1)
+(1 row)
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$,
+true, true, false, true);
+                                   explain_mask_costs                                   
+----------------------------------------------------------------------------------------
+ Function Scan on generate_series g  (cost=N..N rows=1 width=N) (actual rows=0 loops=1)
+(1 row)
+
+-- Ensure we get the default row estimate for error cases (infinity/NaN values
+-- and zero step size)
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$,
+false, true, false, true);
+                        explain_mask_costs                         
+-------------------------------------------------------------------
+ Function Scan on generate_series g  (cost=N..N rows=1000 width=N)
+(1 row)
+
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$,
+false, true, false, true);
+                        explain_mask_costs                         
+-------------------------------------------------------------------
+ Function Scan on generate_series g  (cost=N..N rows=1000 width=N)
+(1 row)
+
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$,
+false, true, false, true);
+                        explain_mask_costs                         
+-------------------------------------------------------------------
+ Function Scan on generate_series g  (cost=N..N rows=1000 width=N)
+(1 row)
+
 -- Test functions for control data
 SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
  ok 
diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql
index b7495d70eb..0a0c6e5a6b 100644
--- a/src/test/regress/sql/misc_functions.sql
+++ b/src/test/regress/sql/misc_functions.sql
@@ -311,6 +311,44 @@ false, true, false, true);
 -- the support function.
 SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
 
+--
+-- Test the SupportRequestRows support function for generate_series_numeric()
+--
+
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0) g(s);$$,
+true, true, false, true);
+
+-- As above but with non-default step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$,
+true, true, false, true);
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$,
+true, true, false, true);
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$,
+true, true, false, true);
+
+-- Ensure we get the default row estimate for error cases (infinity/NaN values
+-- and zero step size)
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$,
+false, true, false, true);
+
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$,
+false, true, false, true);
+
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$,
+false, true, false, true);
+
 -- Test functions for control data
 SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
 SELECT count(*) > 0 AS ok FROM pg_control_init();
-- 
2.43.0

Reply via email to