I wrote: > There's a considerable amount of follow-up work that ought to happen > now to make use of these capabilities for places that have been > pain points in the past, such as generate_series() and unnest(). > But I haven't touched that yet.
Attached is an 0004 that makes a stab at providing some intelligence for unnest() and the integer cases of generate_series(). This only affects one plan choice in the existing regression tests; I tweaked that test to keep the plan the same. I didn't add new test cases demonstrating the functionality, since it's a bit hard to show it directly within the constraints of EXPLAIN (COSTS OFF). We could do something along the lines of the quick-hack rowcount test in 0003, perhaps, but that's pretty indirect. Looking at this, I'm dissatisfied with the amount of new #include's being dragged into datatype-specific .c files. I don't really want to end up with most of utils/adt/ having dependencies on planner data structures, but that's where we would be headed. I can think of a couple of possibilities: * Instead of putting support functions beside their target function, group all the core's support functions into one new .c file. I'm afraid this would lead to the reverse problem of having to import lots of datatype-private info into that file. * Try to refactor the planner's .h files so that there's just one "external use" header providing stuff like estimate_expression_value, while keeping PlannerInfo as an opaque struct. Then importing that into utils/adt/ files would not represent such a big dependency footprint. I find the second choice more appealing, though it's getting a bit far afield from where this started. OTOH, lots of other header refactoring is going on right now, so why not ... Thoughts? regards, tom lane
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index e457d81..14cc202 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -22,12 +22,15 @@ #include "catalog/pg_type.h" #include "funcapi.h" #include "libpq/pqformat.h" +#include "nodes/supportnodes.h" +#include "optimizer/clauses.h" #include "utils/array.h" #include "utils/arrayaccess.h" #include "utils/builtins.h" #include "utils/datum.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/selfuncs.h" #include "utils/typcache.h" @@ -6026,6 +6029,36 @@ array_unnest(PG_FUNCTION_ARGS) } } +/* + * Planner support function for array_unnest(anyarray) + */ +Datum +array_unnest_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestRows)) + { + /* Try to estimate the number of rows returned */ + SupportRequestRows *req = (SupportRequestRows *) rawreq; + + if (is_funcclause(req->node)) /* be paranoid */ + { + List *args = ((FuncExpr *) req->node)->args; + Node *arg1; + + /* We can use estimated argument values here */ + arg1 = estimate_expression_value(req->root, linitial(args)); + + req->rows = estimate_array_length(arg1); + ret = (Node *) req; + } + } + + PG_RETURN_POINTER(ret); +} + /* * array_replace/array_remove support diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c index fd82a83..263920c 100644 --- a/src/backend/utils/adt/int.c +++ b/src/backend/utils/adt/int.c @@ -30,11 +30,14 @@ #include <ctype.h> #include <limits.h> +#include <math.h> #include "catalog/pg_type.h" #include "common/int.h" #include "funcapi.h" #include "libpq/pqformat.h" +#include "nodes/supportnodes.h" +#include "optimizer/clauses.h" #include "utils/array.h" #include "utils/builtins.h" @@ -1427,3 +1430,73 @@ generate_series_step_int4(PG_FUNCTION_ARGS) /* do when there is no more left */ SRF_RETURN_DONE(funcctx); } + +/* + * Planner support function for generate_series(int4, int4 [, int4]) + */ +Datum +generate_series_int4_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestRows)) + { + /* Try to estimate the number of rows returned */ + SupportRequestRows *req = (SupportRequestRows *) rawreq; + + if (is_funcclause(req->node)) /* be paranoid */ + { + List *args = ((FuncExpr *) req->node)->args; + Node *arg1, + *arg2, + *arg3; + + /* We can use estimated argument values here */ + arg1 = estimate_expression_value(req->root, linitial(args)); + arg2 = estimate_expression_value(req->root, lsecond(args)); + if (list_length(args) >= 3) + arg3 = estimate_expression_value(req->root, lthird(args)); + else + arg3 = NULL; + + /* + * If any argument is constant NULL, we can safely assume that + * zero rows are returned. Otherwise, if they're all non-NULL + * constants, we can calculate the number of rows that will be + * returned. Use double arithmetic to avoid overflow hazards. + */ + if ((IsA(arg1, Const) && + ((Const *) arg1)->constisnull) || + (IsA(arg2, Const) && + ((Const *) arg2)->constisnull) || + (arg3 != NULL && IsA(arg3, Const) && + ((Const *) arg3)->constisnull)) + { + req->rows = 0; + ret = (Node *) req; + } + else if (IsA(arg1, Const) && + IsA(arg2, Const) && + (arg3 == NULL || IsA(arg3, Const))) + { + double start, + finish, + step; + + start = DatumGetInt32(((Const *) arg1)->constvalue); + finish = DatumGetInt32(((Const *) arg2)->constvalue); + step = arg3 ? DatumGetInt32(((Const *) arg3)->constvalue) : 1; + + /* This equation works for either sign of step */ + if (step != 0) + { + req->rows = floor((finish - start + step) / step); + ret = (Node *) req; + } + } + } + } + + PG_RETURN_POINTER(ret); +} diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index d16cc9e..5157de4 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -20,6 +20,8 @@ #include "common/int.h" #include "funcapi.h" #include "libpq/pqformat.h" +#include "nodes/supportnodes.h" +#include "optimizer/clauses.h" #include "utils/int8.h" #include "utils/builtins.h" @@ -1373,3 +1375,73 @@ generate_series_step_int8(PG_FUNCTION_ARGS) /* do when there is no more left */ SRF_RETURN_DONE(funcctx); } + +/* + * Planner support function for generate_series(int8, int8 [, int8]) + */ +Datum +generate_series_int8_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestRows)) + { + /* Try to estimate the number of rows returned */ + SupportRequestRows *req = (SupportRequestRows *) rawreq; + + if (is_funcclause(req->node)) /* be paranoid */ + { + List *args = ((FuncExpr *) req->node)->args; + Node *arg1, + *arg2, + *arg3; + + /* We can use estimated argument values here */ + arg1 = estimate_expression_value(req->root, linitial(args)); + arg2 = estimate_expression_value(req->root, lsecond(args)); + if (list_length(args) >= 3) + arg3 = estimate_expression_value(req->root, lthird(args)); + else + arg3 = NULL; + + /* + * If any argument is constant NULL, we can safely assume that + * zero rows are returned. Otherwise, if they're all non-NULL + * constants, we can calculate the number of rows that will be + * returned. Use double arithmetic to avoid overflow hazards. + */ + if ((IsA(arg1, Const) && + ((Const *) arg1)->constisnull) || + (IsA(arg2, Const) && + ((Const *) arg2)->constisnull) || + (arg3 != NULL && IsA(arg3, Const) && + ((Const *) arg3)->constisnull)) + { + req->rows = 0; + ret = (Node *) req; + } + else if (IsA(arg1, Const) && + IsA(arg2, Const) && + (arg3 == NULL || IsA(arg3, Const))) + { + double start, + finish, + step; + + start = DatumGetInt64(((Const *) arg1)->constvalue); + finish = DatumGetInt64(((Const *) arg2)->constvalue); + step = arg3 ? DatumGetInt64(((Const *) arg3)->constvalue) : 1; + + /* This equation works for either sign of step */ + if (step != 0) + { + req->rows = floor((finish - start + step) / step); + ret = (Node *) req; + } + } + } + } + + PG_RETURN_POINTER(ret); +} diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index e5cb5bb..039b596 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1530,9 +1530,12 @@ proargtypes => 'anyelement _int4 _int4', prosrc => 'array_fill_with_lower_bounds' }, { oid => '2331', descr => 'expand array to set of rows', - proname => 'unnest', prorows => '100', proretset => 't', - prorettype => 'anyelement', proargtypes => 'anyarray', + proname => 'unnest', prorows => '100', prosupport => 'array_unnest_support', + proretset => 't', prorettype => 'anyelement', proargtypes => 'anyarray', prosrc => 'array_unnest' }, +{ oid => '3996', descr => 'planner support for array_unnest', + proname => 'array_unnest_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'array_unnest_support' }, { oid => '3167', descr => 'remove any occurrences of an element from an array', proname => 'array_remove', proisstrict => 'f', prorettype => 'anyarray', @@ -7536,21 +7539,31 @@ # non-persistent series generator { oid => '1066', descr => 'non-persistent series generator', - proname => 'generate_series', prorows => '1000', proretset => 't', + proname => 'generate_series', prorows => '1000', + prosupport => 'generate_series_int4_support', proretset => 't', prorettype => 'int4', proargtypes => 'int4 int4 int4', prosrc => 'generate_series_step_int4' }, { oid => '1067', descr => 'non-persistent series generator', - proname => 'generate_series', prorows => '1000', proretset => 't', + proname => 'generate_series', prorows => '1000', + prosupport => 'generate_series_int4_support', proretset => 't', prorettype => 'int4', proargtypes => 'int4 int4', prosrc => 'generate_series_int4' }, +{ oid => '3994', descr => 'planner support for generate_series', + proname => 'generate_series_int4_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'generate_series_int4_support' }, { oid => '1068', descr => 'non-persistent series generator', - proname => 'generate_series', prorows => '1000', proretset => 't', + proname => 'generate_series', prorows => '1000', + prosupport => 'generate_series_int8_support', proretset => 't', prorettype => 'int8', proargtypes => 'int8 int8 int8', prosrc => 'generate_series_step_int8' }, { oid => '1069', descr => 'non-persistent series generator', - proname => 'generate_series', prorows => '1000', proretset => 't', + proname => 'generate_series', prorows => '1000', + prosupport => 'generate_series_int8_support', proretset => 't', prorettype => 'int8', proargtypes => 'int8 int8', prosrc => 'generate_series_int8' }, +{ oid => '3995', descr => 'planner support for generate_series', + proname => 'generate_series_int8_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'generate_series_int8_support' }, { oid => '3259', descr => 'non-persistent series generator', proname => 'generate_series', prorows => '1000', proretset => 't', prorettype => 'numeric', proargtypes => 'numeric numeric numeric', diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 588d069..4056afa 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -904,7 +904,7 @@ select * from int4_tbl where -- explain (verbose, costs off) select * from int4_tbl o where (f1, f1) in - (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); + (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1); QUERY PLAN ------------------------------------------------------------------- Nested Loop Semi Join @@ -918,9 +918,9 @@ select * from int4_tbl o where (f1, f1) in Output: "ANY_subquery".f1, "ANY_subquery".g Filter: ("ANY_subquery".f1 = "ANY_subquery".g) -> Result - Output: i.f1, ((generate_series(1, 2)) / 10) + Output: i.f1, ((generate_series(1, 50)) / 10) -> ProjectSet - Output: generate_series(1, 2), i.f1 + Output: generate_series(1, 50), i.f1 -> HashAggregate Output: i.f1 Group Key: i.f1 @@ -929,7 +929,7 @@ select * from int4_tbl o where (f1, f1) in (19 rows) select * from int4_tbl o where (f1, f1) in - (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); + (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1); f1 ---- 0 diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 843f511..ccbe8a1 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -498,9 +498,9 @@ select * from int4_tbl where -- explain (verbose, costs off) select * from int4_tbl o where (f1, f1) in - (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); + (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1); select * from int4_tbl o where (f1, f1) in - (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); + (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1); -- -- check for over-optimization of whole-row Var referencing an Append plan