On Wed, 16 Oct 2024 at 08:43, Andy Fan <zhihuifan1...@163.com> wrote: > > Thanks for the detailed feedback! Here is the rebased version. >
I took another look at this and I think it's in reasonable shape. I'm attaching an update, rebasing it on top of 9be4e5d293. Also it was missing a required update to the meson.build file -- that's the immediate cause of the other cfbot failures. The rest is just cosmetic tidying up, fixing indentation, tweaking comments, and the like. I also hacked on the docs a bit -- the synopsis only listed one of the new function signatures for some reason. After fixing that, I think it's sufficient to just list one usage example. Regards, Dean
From 2a14be071dd2e721e768fdbaa57b096d509773aa Mon Sep 17 00:00:00 2001 From: Dean Rasheed <dean.a.rash...@gmail.com> Date: Wed, 30 Oct 2024 08:41:41 +0000 Subject: [PATCH v3] tablefunc: Add rand_array() functions. These functions return sets of random-length arrays, containing uniformly distributed random values of type integer, bigint, or numeric. Andy Fan, reviewed by Jim Jones, Japin Li, and Dean Rasheed. Discussion: https://postgr.es/m/87plssezpc....@163.com --- contrib/tablefunc/Makefile | 2 +- contrib/tablefunc/expected/tablefunc.out | 66 ++++++++ contrib/tablefunc/meson.build | 1 + contrib/tablefunc/sql/tablefunc.sql | 16 ++ contrib/tablefunc/tablefunc--1.0--1.1.sql | 19 +++ contrib/tablefunc/tablefunc.c | 197 ++++++++++++++++++++++ contrib/tablefunc/tablefunc.control | 2 +- doc/src/sgml/tablefunc.sgml | 66 ++++++++ src/backend/utils/adt/arrayfuncs.c | 1 + src/tools/pgindent/typedefs.list | 1 + 10 files changed, 369 insertions(+), 2 deletions(-) create mode 100644 contrib/tablefunc/tablefunc--1.0--1.1.sql diff --git a/contrib/tablefunc/Makefile b/contrib/tablefunc/Makefile index 191a3a1d38..f0c67308fd 100644 --- a/contrib/tablefunc/Makefile +++ b/contrib/tablefunc/Makefile @@ -3,7 +3,7 @@ MODULES = tablefunc EXTENSION = tablefunc -DATA = tablefunc--1.0.sql +DATA = tablefunc--1.0.sql tablefunc--1.0--1.1.sql PGFILEDESC = "tablefunc - various functions that return tables" REGRESS = tablefunc diff --git a/contrib/tablefunc/expected/tablefunc.out b/contrib/tablefunc/expected/tablefunc.out index ddece79029..9c3f7529d5 100644 --- a/contrib/tablefunc/expected/tablefunc.out +++ b/contrib/tablefunc/expected/tablefunc.out @@ -13,6 +13,72 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2); SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2); ERROR: number of rows cannot be negative -- +-- rand_array() +-- use setseed() to get stable results +-- +SELECT setseed(0); + setseed +--------- + +(1 row) + +SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 50::int, 80::int); + pg_typeof | rand_array +-----------+------------ + integer[] | {63,71,66} + integer[] | {64} + integer[] | {54} + integer[] | {72,64,60} + integer[] | {75} + integer[] | {53,73} + integer[] | {69} + integer[] | {74,67} + integer[] | {65} + integer[] | {73} +(10 rows) + +SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 50::bigint, 80::bigint); + pg_typeof | rand_array +-----------+------------ + bigint[] | {} + bigint[] | {59,53} + bigint[] | {72} + bigint[] | {80,79} + bigint[] | {71} + bigint[] | {80,80} + bigint[] | {61,64} + bigint[] | {62,76,80} + bigint[] | {} + bigint[] | {} +(10 rows) + +SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 50::numeric, 80::numeric); + pg_typeof | rand_array +-----------+------------ + numeric[] | {77,66,73} + numeric[] | {51,53} + numeric[] | {65,54} + numeric[] | {72} + numeric[] | {55,72} + numeric[] | {} + numeric[] | {55,70,64} + numeric[] | {75} + numeric[] | {} + numeric[] | {} +(10 rows) + +-- negative number of tuples +SELECT pg_typeof(rand_array), * FROM rand_array(-1, 0, 3, 50::int, 80::int); +ERROR: number of rows cannot be negative +-- invalid length bounds +SELECT pg_typeof(rand_array), * FROM rand_array(10, -1, 3, 50::int, 80::int); +ERROR: minlen must be greater than or equal to zero +SELECT pg_typeof(rand_array), * FROM rand_array(10, 3, 0, 50::int, 80::int); +ERROR: maxlen must be greater than or equal to minlen +-- invalid value bounds +SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 80::int, 50::int); +ERROR: lower bound must be less than or equal to upper bound +-- -- crosstab() -- CREATE TABLE ct(id int, rowclass text, rowid text, attribute text, val text); diff --git a/contrib/tablefunc/meson.build b/contrib/tablefunc/meson.build index dccf3b3758..f794163ecc 100644 --- a/contrib/tablefunc/meson.build +++ b/contrib/tablefunc/meson.build @@ -18,6 +18,7 @@ contrib_targets += tablefunc install_data( 'tablefunc--1.0.sql', + 'tablefunc--1.0--1.1.sql', 'tablefunc.control', kwargs: contrib_data_args, ) diff --git a/contrib/tablefunc/sql/tablefunc.sql b/contrib/tablefunc/sql/tablefunc.sql index 0fb8e40de2..f83ccb7ec4 100644 --- a/contrib/tablefunc/sql/tablefunc.sql +++ b/contrib/tablefunc/sql/tablefunc.sql @@ -8,6 +8,22 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2); -- negative number of tuples SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2); +-- +-- rand_array() +-- use setseed() to get stable results +-- +SELECT setseed(0); +SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 50::int, 80::int); +SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 50::bigint, 80::bigint); +SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 50::numeric, 80::numeric); +-- negative number of tuples +SELECT pg_typeof(rand_array), * FROM rand_array(-1, 0, 3, 50::int, 80::int); +-- invalid length bounds +SELECT pg_typeof(rand_array), * FROM rand_array(10, -1, 3, 50::int, 80::int); +SELECT pg_typeof(rand_array), * FROM rand_array(10, 3, 0, 50::int, 80::int); +-- invalid value bounds +SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 80::int, 50::int); + -- -- crosstab() -- diff --git a/contrib/tablefunc/tablefunc--1.0--1.1.sql b/contrib/tablefunc/tablefunc--1.0--1.1.sql new file mode 100644 index 0000000000..18e4aed826 --- /dev/null +++ b/contrib/tablefunc/tablefunc--1.0--1.1.sql @@ -0,0 +1,19 @@ +/* contrib/tablefunc/tablefunc--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION tablefunc UPDATE TO '1.1'" to load this file. \quit + +CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval int, maxval int) +RETURNS setof int[] +AS 'MODULE_PATHNAME','rand_array_int' +LANGUAGE C VOLATILE STRICT; + +CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval bigint, maxval bigint) +RETURNS setof bigint[] +AS 'MODULE_PATHNAME','rand_array_bigint' +LANGUAGE C VOLATILE STRICT; + +CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval numeric, maxval numeric) +RETURNS setof numeric[] +AS 'MODULE_PATHNAME','rand_array_numeric' +LANGUAGE C VOLATILE STRICT; diff --git a/contrib/tablefunc/tablefunc.c b/contrib/tablefunc/tablefunc.c index 4b6ae2fe5c..ff5670d122 100644 --- a/contrib/tablefunc/tablefunc.c +++ b/contrib/tablefunc/tablefunc.c @@ -42,7 +42,9 @@ #include "funcapi.h" #include "lib/stringinfo.h" #include "miscadmin.h" +#include "utils/array.h" #include "utils/builtins.h" +#include "utils/fmgroids.h" PG_MODULE_MAGIC; @@ -83,6 +85,9 @@ static void build_tuplestore_recursively(char *key_fld, AttInMetadata *attinmeta, Tuplestorestate *tupstore); +/* + * cross-call data structure for SRF normal_rand() + */ typedef struct { float8 mean; /* mean of the distribution */ @@ -91,6 +96,15 @@ typedef struct bool use_carry; /* use second generated value */ } normal_rand_fctx; +/* + * cross-call data structure for SRF rand_array() + */ +typedef struct +{ + FunctionCallInfo random_len_fcinfo; /* random array length function */ + FunctionCallInfo random_val_fcinfo; /* random array elem value function */ +} rand_array_fctx; + #define xpfree(var_) \ do { \ if (var_ != NULL) \ @@ -313,6 +327,189 @@ get_normal_pair(float8 *x1, float8 *x2) } } +/* + * rand_array_internal() + * Return the requested number of random-length arrays, filled with + * random values of the specified datatype. + * + * Inputs: + * fcinfo: includes the number of arrays to return, minlen and maxlen array + * length bounds, and minval and maxval array element bounds. + * datatype: the datatype of the array elements. + * + * returns setof datatype[]. + */ +static Datum +rand_array_internal(FunctionCallInfo fcinfo, Oid datatype) +{ + FuncCallContext *funcctx; + rand_array_fctx *fctx; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + int32 num_tuples; + int32 minlen; + int32 maxlen; + Datum minval; + Datum maxval; + Oid random_fn_oid; + FmgrInfo *random_len_flinfo; + FunctionCallInfo random_len_fcinfo; + FmgrInfo *random_val_flinfo; + FunctionCallInfo random_val_fcinfo; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* + * switch to memory context appropriate for multiple function calls + */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* total number of tuples (arrays) to be returned */ + num_tuples = PG_GETARG_INT32(0); + if (num_tuples < 0) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("number of rows cannot be negative")); + funcctx->max_calls = num_tuples; + + /* minimum length of arrays returned */ + minlen = PG_GETARG_INT32(1); + if (minlen < 0) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("minlen must be greater than or equal to zero")); + + /* maximum length of arrays returned */ + maxlen = PG_GETARG_INT32(2); + if (maxlen < minlen) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("maxlen must be greater than or equal to minlen")); + + /* minimum value of array elements */ + minval = PG_GETARG_DATUM(3); + + /* maximum value of array elements */ + maxval = PG_GETARG_DATUM(4); + + /* function to return each array element */ + switch (datatype) + { + case INT4OID: + random_fn_oid = F_RANDOM_INT4_INT4; + break; + case INT8OID: + random_fn_oid = F_RANDOM_INT8_INT8; + break; + case NUMERICOID: + random_fn_oid = F_RANDOM_NUMERIC_NUMERIC; + break; + default: + elog(ERROR, "unsupported type %u for rand_array function", + datatype); + random_fn_oid = 0; /* keep compiler quiet */ + break; + } + + /* allocate memory for user context */ + fctx = (rand_array_fctx *) palloc(sizeof(rand_array_fctx)); + + /* + * Use fctx to keep track of upper and lower array length bounds and + * upper and lower array element value bounds from call to call. These + * bounds are held in the function call info for the array length and + * array element functions. + */ + /* array length function: random(int, int) */ + random_len_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo)); + fmgr_info(F_RANDOM_INT4_INT4, random_len_flinfo); + + random_len_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2)); + InitFunctionCallInfoData(*random_len_fcinfo, random_len_flinfo, 2, + InvalidOid, NULL, NULL); + + random_len_fcinfo->args[0].value = Int32GetDatum(minlen); + random_len_fcinfo->args[0].isnull = false; + random_len_fcinfo->args[1].value = Int32GetDatum(maxlen); + random_len_fcinfo->args[1].isnull = false; + + /* array element function: random(minval, maxval) for datatype */ + random_val_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo)); + fmgr_info(random_fn_oid, random_val_flinfo); + + random_val_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2)); + InitFunctionCallInfoData(*random_val_fcinfo, random_val_flinfo, 2, + InvalidOid, NULL, NULL); + + random_val_fcinfo->args[0].value = minval; + random_val_fcinfo->args[0].isnull = false; + random_val_fcinfo->args[1].value = maxval; + random_val_fcinfo->args[1].isnull = false; + + /* store in SRF user context */ + fctx->random_len_fcinfo = random_len_fcinfo; + fctx->random_val_fcinfo = random_val_fcinfo; + + funcctx->user_fctx = fctx; + + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + + fctx = funcctx->user_fctx; + + if (funcctx->call_cntr < funcctx->max_calls) + { + int array_len; + Datum *array_elems; + ArrayType *array; + + /* + * Return a(nother) random-length array, filled with random values. + */ + array_len = DatumGetInt32(FunctionCallInvoke(fctx->random_len_fcinfo)); + + array_elems = palloc(array_len * sizeof(Datum)); + + for (int i = 0; i < array_len; i++) + array_elems[i] = FunctionCallInvoke(fctx->random_val_fcinfo); + + array = construct_array_builtin(array_elems, array_len, datatype); + + SRF_RETURN_NEXT(funcctx, PointerGetDatum(array)); + } + else + /* do when there is no more left */ + SRF_RETURN_DONE(funcctx); +} + +PG_FUNCTION_INFO_V1(rand_array_int); +Datum +rand_array_int(PG_FUNCTION_ARGS) +{ + return rand_array_internal(fcinfo, INT4OID); +} + +PG_FUNCTION_INFO_V1(rand_array_bigint); +Datum +rand_array_bigint(PG_FUNCTION_ARGS) +{ + return rand_array_internal(fcinfo, INT8OID); +} + +PG_FUNCTION_INFO_V1(rand_array_numeric); +Datum +rand_array_numeric(PG_FUNCTION_ARGS) +{ + return rand_array_internal(fcinfo, NUMERICOID); +} + /* * crosstab - create a crosstab of rowids and values columns from a * SQL statement returning one rowid column, one category column, diff --git a/contrib/tablefunc/tablefunc.control b/contrib/tablefunc/tablefunc.control index 7b25d16170..9cc6222a4f 100644 --- a/contrib/tablefunc/tablefunc.control +++ b/contrib/tablefunc/tablefunc.control @@ -1,6 +1,6 @@ # tablefunc extension comment = 'functions that manipulate whole tables, including crosstab' -default_version = '1.0' +default_version = '1.1' module_pathname = '$libdir/tablefunc' relocatable = true trusted = true diff --git a/doc/src/sgml/tablefunc.sgml b/doc/src/sgml/tablefunc.sgml index e10fe7009d..a06ba10e20 100644 --- a/doc/src/sgml/tablefunc.sgml +++ b/doc/src/sgml/tablefunc.sgml @@ -53,6 +53,21 @@ </para></entry> </row> + <row> + <entry role="func_table_entry"><para role="func_signature"> + <function>rand_array</function> ( <parameter>numvals</parameter> <type>integer</type>, <parameter>minlen</parameter> <type>integer</type>, <parameter>maxlen</parameter> <type>integer</type>, <parameter>minval</parameter> <type><replaceable>numeric_type</replaceable></type>, <parameter>maxval</parameter> <type><replaceable>numeric_type</replaceable></type> ) + <returnvalue>setof <replaceable>numeric_type[]</replaceable></returnvalue> + </para> + <para> + Produces a set of random-length <replaceable>numeric_type[]</replaceable> + arrays of uniformly distributed random values, using the + <function>random()</function> functions listed in + <xref linkend="functions-math-random-table"/>; available for + <type>integer</type>, <type>bigint</type>, and <type>numeric</type>. + </para> + </entry> + </row> + <row> <entry role="func_table_entry"><para role="func_signature"> <function>crosstab</function> ( <parameter>sql</parameter> <type>text</type> ) @@ -166,6 +181,57 @@ test=# SELECT * FROM normal_rand(1000, 5, 3); </screen> </sect3> + <sect3 id="tablefunc-functions-rand-array"> + <title><function>rand_array</function></title> + + <indexterm> + <primary>rand_array</primary> + </indexterm> + +<synopsis> +rand_array(int numvals, int minlen, int maxlen, int minval, int maxval) returns setof int[] +rand_array(int numvals, int minlen, int maxlen, bigint minval, bigint maxval) returns setof bigint[] +rand_array(int numvals, int minlen, int maxlen, numeric minval, numeric maxval) returns setof numeric[] +</synopsis> + + <para> + <function>rand_array</function> produces a set of random-length arrays of + uniformly distributed random values. + </para> + + <para> + <parameter>numvals</parameter> is the number of arrays to be returned + by the function. <parameter>minlen</parameter> and + <parameter>maxlen</parameter> are the minumum and maximum lengths of each + array, and <parameter>minval</parameter> and <parameter>maxval</parameter> + are the minumum and maximum values of each array element. + </para> + + <para> + For example, this call requests 1000 arrays with lengths between 0 and 4 + and elements of type <type>numeric</type> with values between + <literal>-1.0</literal> and <literal>1.0</literal>: + </para> + +<screen> +test=# SELECT * FROM rand_array(1000, 0, 4, -1.0, 1.0); + rand_array +----------------------- + {1.0,-1.0,-0.7} + {1.0} + {-0.6,-0.7} + {-0.9,-0.9,-0.6,0.5} + {} + . + . + . + {0.0,-0.8} + {} + {-0.9,-0.9} +(1000 rows) +</screen> + </sect3> + <sect3 id="tablefunc-functions-crosstab-text"> <title><function>crosstab(text)</function></title> diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index a715e7e0b8..4255aede2b 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -3442,6 +3442,7 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype) break; case TEXTOID: + case NUMERICOID: elmlen = -1; elmbyval = false; elmalign = TYPALIGN_INT; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 171a7dd5d2..cf1d1757d4 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3834,6 +3834,7 @@ qsort_comparator query_pathkeys_callback radius_attribute radius_packet +rand_array_fctx rangeTableEntry_used_context rank_context rbt_allocfunc -- 2.43.0