On Fri, 29 Nov 2024 at 13:10, Dean Rasheed <dean.a.rash...@gmail.com> wrote: > > There are a couple more things that I think need tidying up. I'll post an > update when I get back to my computer. >
Here's an update with some cosmetic tidying up, plus a couple of not-so-cosmetic changes: The new #include wasn't in the right place alphabetically (the same is true for the recent timestamp equivalent function). It's not necessary to call init_var() for a variable that you're going to initialise with init_var_from_num(), and it's then not necessary to call free_var() for that variable. It's not necessary to have separate NumericVars for the difference and quotient -- the same variable can be reused. Doing both those things means that there's only one variable to free after the computation, and it can be kept local to if-step-not-zero code block, so there's no need for the "goto cleanup" stuff. It seems worth avoiding div_var() in the 2-argument case, when step = 1. Regards, Dean
From b2de4cdc4f0c2752a734ce9dd2a55d92644ba92e Mon Sep 17 00:00:00 2001 From: Dean Rasheed <dean.a.rash...@gmail.com> Date: Fri, 29 Nov 2024 19:49:28 +0000 Subject: [PATCH v4] Add a planner support function for numeric generate_series(). This allows the planner to estimate the number of rows returned by generate_series(numeric, numeric[, numeric]), when the input values can be estimated at plan time. Song Jinzhou, reviewed by Dean Rasheed and David Rowley. Discussion: https://postgr.es/m/tencent_F43E7F4DD50EF5986D1051DE8DE547910206%40qq.com Discussion: https://postgr.es/m/tencent_1F6D5B9A1545E02FD7D0EE508DFD056DE50A%40qq.com --- src/backend/utils/adt/numeric.c | 121 +++++++++++++++++++ src/include/catalog/pg_proc.dat | 9 +- src/test/regress/expected/misc_functions.out | 65 ++++++++++ src/test/regress/sql/misc_functions.sql | 38 ++++++ 4 files changed, 231 insertions(+), 2 deletions(-) diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 344d7137f9..5c33cd3ab0 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -34,6 +34,7 @@ #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "nodes/supportnodes.h" +#include "optimizer/optimizer.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/float.h" @@ -1827,6 +1828,126 @@ generate_series_step_numeric(PG_FUNCTION_ARGS) SRF_RETURN_DONE(funcctx); } +/* + * Planner support function for generate_series(numeric, numeric [, numeric]) + */ +Datum +generate_series_numeric_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestRows)) + { + /* Try to estimate the number of rows returned */ + SupportRequestRows *req = (SupportRequestRows *) rawreq; + + if (is_funcclause(req->node)) /* be paranoid */ + { + List *args = ((FuncExpr *) req->node)->args; + Node *arg1, + *arg2, + *arg3; + + /* We can use estimated argument values here */ + arg1 = estimate_expression_value(req->root, linitial(args)); + arg2 = estimate_expression_value(req->root, lsecond(args)); + if (list_length(args) >= 3) + arg3 = estimate_expression_value(req->root, lthird(args)); + else + arg3 = NULL; + + /* + * If any argument is constant NULL, we can safely assume that + * zero rows are returned. Otherwise, if they're all non-NULL + * constants, we can calculate the number of rows that will be + * returned. + */ + if ((IsA(arg1, Const) && + ((Const *) arg1)->constisnull) || + (IsA(arg2, Const) && + ((Const *) arg2)->constisnull) || + (arg3 != NULL && IsA(arg3, Const) && + ((Const *) arg3)->constisnull)) + { + req->rows = 0; + ret = (Node *) req; + } + else if (IsA(arg1, Const) && + IsA(arg2, Const) && + (arg3 == NULL || IsA(arg3, Const))) + { + Numeric start_num; + Numeric stop_num; + NumericVar step = const_one; + + /* + * If any argument is NaN or infinity, generate_series() will + * error out, so we needn't produce an estimate. + */ + start_num = DatumGetNumeric(((Const *) arg1)->constvalue); + stop_num = DatumGetNumeric(((Const *) arg2)->constvalue); + + if (NUMERIC_IS_SPECIAL(start_num) || + NUMERIC_IS_SPECIAL(stop_num)) + PG_RETURN_POINTER(NULL); + + if (arg3) + { + Numeric step_num; + + step_num = DatumGetNumeric(((Const *) arg3)->constvalue); + + if (NUMERIC_IS_SPECIAL(step_num)) + PG_RETURN_POINTER(NULL); + + init_var_from_num(step_num, &step); + } + + /* + * The number of rows that will be returned is given by + * floor((stop - start) / step) + 1, if the sign of step + * matches the sign of stop - start. Otherwise, no rows will + * be returned. + */ + if (cmp_var(&step, &const_zero) != 0) + { + NumericVar start; + NumericVar stop; + NumericVar res; + + init_var_from_num(start_num, &start); + init_var_from_num(stop_num, &stop); + + init_var(&res); + sub_var(&stop, &start, &res); + + if (step.sign != res.sign) + { + /* no rows will be returned */ + req->rows = 0; + ret = (Node *) req; + } + else + { + if (arg3) + div_var(&res, &step, &res, 0, false, false); + else + trunc_var(&res, 0); /* step = 1 */ + + req->rows = numericvar_to_double_no_overflow(&res) + 1; + ret = (Node *) req; + } + + free_var(&res); + } + } + } + } + + PG_RETURN_POINTER(ret); +} + /* * Implements the numeric version of the width_bucket() function diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index cbbe8acd38..9575524007 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -8464,13 +8464,18 @@ proname => 'generate_series_int8_support', prorettype => 'internal', proargtypes => 'internal', prosrc => 'generate_series_int8_support' }, { oid => '3259', descr => 'non-persistent series generator', - proname => 'generate_series', prorows => '1000', proretset => 't', + proname => 'generate_series', prorows => '1000', + prosupport => 'generate_series_numeric_support', proretset => 't', prorettype => 'numeric', proargtypes => 'numeric numeric numeric', prosrc => 'generate_series_step_numeric' }, { oid => '3260', descr => 'non-persistent series generator', - proname => 'generate_series', prorows => '1000', proretset => 't', + proname => 'generate_series', prorows => '1000', + prosupport => 'generate_series_numeric_support', proretset => 't', prorettype => 'numeric', proargtypes => 'numeric numeric', prosrc => 'generate_series_numeric' }, +{ oid => '8405', descr => 'planner support for generate_series', + proname => 'generate_series_numeric_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'generate_series_numeric_support' }, { oid => '938', descr => 'non-persistent series generator', proname => 'generate_series', prorows => '1000', prosupport => 'generate_series_timestamp_support', proretset => 't', diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out index 36b1201f9f..8c73d2b621 100644 --- a/src/test/regress/expected/misc_functions.out +++ b/src/test/regress/expected/misc_functions.out @@ -712,6 +712,71 @@ false, true, false, true); -- the support function. SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s); ERROR: step size cannot equal zero +-- +-- Test the SupportRequestRows support function for generate_series_numeric() +-- +-- Ensure the row estimate matches the actual rows +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0) g(s);$$, +true, true, false, true); + explain_mask_costs +------------------------------------------------------------------------------------------ + Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25 loops=1) +(1 row) + +-- As above but with non-default step +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$, +true, true, false, true); + explain_mask_costs +------------------------------------------------------------------------------------------ + Function Scan on generate_series g (cost=N..N rows=13 width=N) (actual rows=13 loops=1) +(1 row) + +-- Ensure the estimates match when step is decreasing +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$, +true, true, false, true); + explain_mask_costs +------------------------------------------------------------------------------------------ + Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25 loops=1) +(1 row) + +-- Ensure an empty range estimates 1 row +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$, +true, true, false, true); + explain_mask_costs +---------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0 loops=1) +(1 row) + +-- Ensure we get the default row estimate for error cases (infinity/NaN values +-- and zero step size) +SELECT explain_mask_costs($$ +SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$, +false, true, false, true); + explain_mask_costs +------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1000 width=N) +(1 row) + +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$, +false, true, false, true); + explain_mask_costs +------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1000 width=N) +(1 row) + +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$, +false, true, false, true); + explain_mask_costs +------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1000 width=N) +(1 row) + -- Test functions for control data SELECT count(*) > 0 AS ok FROM pg_control_checkpoint(); ok diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql index b7495d70eb..0a0c6e5a6b 100644 --- a/src/test/regress/sql/misc_functions.sql +++ b/src/test/regress/sql/misc_functions.sql @@ -311,6 +311,44 @@ false, true, false, true); -- the support function. SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s); +-- +-- Test the SupportRequestRows support function for generate_series_numeric() +-- + +-- Ensure the row estimate matches the actual rows +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0) g(s);$$, +true, true, false, true); + +-- As above but with non-default step +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$, +true, true, false, true); + +-- Ensure the estimates match when step is decreasing +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$, +true, true, false, true); + +-- Ensure an empty range estimates 1 row +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$, +true, true, false, true); + +-- Ensure we get the default row estimate for error cases (infinity/NaN values +-- and zero step size) +SELECT explain_mask_costs($$ +SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$, +false, true, false, true); + +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$, +false, true, false, true); + +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$, +false, true, false, true); + -- Test functions for control data SELECT count(*) > 0 AS ok FROM pg_control_checkpoint(); SELECT count(*) > 0 AS ok FROM pg_control_init(); -- 2.43.0