sorry little bit fixed patch
Pavel 2010/9/23 Pavel Stehule <pavel.steh...@gmail.com>: > Hello > > I moved a "median" function to core. > > + doc part > + regress test > > Regards > > Pavel Stehule > > > 2010/9/20 Hitoshi Harada <umi.tan...@gmail.com>: >> 2010/8/19 Pavel Stehule <pavel.steh...@gmail.com>: >>> Hello >>> >>> I am sending a prototype implementation of functions median and >>> percentile. This implementation is very simple and I moved it to >>> contrib for this moment - it is more easy maintainable. Later I'll >>> move it to core. >> >> I've reviewed this patch. >> >> * The patch can apply cleanly and make doesn't print any errors nor >> warnings. But it doesn't touch contrib/Makefile so I had to make by >> changing dir to contrib/median. >> >> * Cosmetic coding style should be more appropriate, including trailing >> white spaces and indentation positions. >> >> * Since these two aggregates use tuplesort inside it, there're >> possible risk to cause out of memory in normal use case. I don't think >> this fact is critical, but at least some notation should be referred >> in docs. >> >> * It doesn't contain any document nor regression tests. >> >> * They should be callable in window function context; for example >> >> contrib_regression=# select median(a) over (order by a) from t1; >> ERROR: invalid tuplesort state >> >> or at least user-friend message should be printed. >> >> * The returned type is controversy. median(int) returns float8 as the >> patch intended, but avg(int) returns numeric. AFAIK only avg(float8) >> returns float8. >> >> * percentile() is more problematic; first, the second argument for the >> aggregate takes N of "N%ile" as int, like 50 if you want 50%ile, but >> it doesn't care about negative values or more than 100. In addition, >> the second argument is taken at the first non-NULL value of the first >> argument, but the second argument is semantically constant. For >> example, for (1.. 10) value of a in table t1, >> >> contrib_regression=# select percentile(a, a * 10 order by a) from t1; >> percentile >> ------------ >> 1 >> (1 row) >> >> contrib_regression=# select percentile(a, a * 10 order by a desc) from t1; >> percentile >> ------------ >> 10 >> (1 row) >> >> and if the argument comes from the subquery which doesn't contain >> ORDER BY clause, you cannot predict the result. >> >> That's all of my quick review up to now. >> >> Regards, >> >> -- >> Hitoshi Harada >> >
*** ./doc/src/sgml/func.sgml.orig 2010-08-17 06:37:20.000000000 +0200 --- ./doc/src/sgml/func.sgml 2010-09-23 15:03:10.021576906 +0200 *************** *** 10304,10309 **** --- 10304,10329 ---- <row> <entry> + <indexterm> + <primary>Arithmetic median</primary> + <secondary>median</secondary> + </indexterm> + <function>median(<replaceable class="parameter">expression</replaceable>)</function> + </entry> + <entry> + <type>smallint</type>, <type>int</type>, + <type>bigint</type>, <type>real</type>, <type>double + precision</type>, or <type>numeric</type> + </entry> + <entry> + <type>double precision</type> for floating-point arguments, + otherwise <type>numeric</type> + </entry> + <entry>arithmetic median</entry> + </row> + + <row> + <entry> <function>regr_avgx(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function> </entry> <entry> *** ./src/backend/utils/adt/numeric.c.orig 2010-08-04 19:33:09.000000000 +0200 --- ./src/backend/utils/adt/numeric.c 2010-09-23 15:24:04.025453940 +0200 *************** *** 30,39 **** --- 30,42 ---- #include "catalog/pg_type.h" #include "libpq/pqformat.h" #include "miscadmin.h" + #include "parser/parse_coerce.h" + #include "parser/parse_oper.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/int8.h" #include "utils/numeric.h" + #include "utils/tuplesort.h" /* ---------- * Uncomment the following to enable compilation of dump_numeric() *************** *** 144,149 **** --- 147,161 ---- union NumericChoice choice; /* choice of format */ }; + /* + * used as type of state variable median's function + */ + typedef struct + { + int nelems; /* number of valid entries */ + Tuplesortstate *sortstate; + FmgrInfo cast_func_finfo; + } MedianAggState; /* * Interpretation of high bits. *************** *** 6173,6175 **** --- 6185,6467 ---- var->digits = digits; var->ndigits = ndigits; } + + static MedianAggState * + makeMedianAggState(FunctionCallInfo fcinfo, Oid valtype, Oid targettype) + { + MemoryContext oldctx; + MemoryContext aggcontext; + MedianAggState *aggstate; + Oid sortop, + castfunc; + CoercionPathType pathtype; + + /* + * We cannot to allow a median function under WindowAgg State. + * This content needs a repetetive a calling of final function, + * what isn't possible now with using a tuplestore. + * tuplesort_performsort can be called only once, and some has + * to call a tuplesort_end. + */ + if (fcinfo->context && IsA(fcinfo->context, WindowAggState)) + { + /* cannot be called inside like windowAggregates */ + elog(ERROR, "median_transfn called as windows aggregates"); + } + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "median_transfn called in non-aggregate context"); + } + + oldctx = MemoryContextSwitchTo(aggcontext); + + aggstate = (MedianAggState *) palloc0(sizeof(MedianAggState)); + + valtype = get_fn_expr_argtype(fcinfo->flinfo, 1); + get_sort_group_operators(valtype, + true, false, false, + &sortop, NULL, NULL); + + aggstate->sortstate = tuplesort_begin_datum(valtype, + sortop, + SORTBY_NULLS_DEFAULT, + work_mem, true); + + MemoryContextSwitchTo(oldctx); + + if (valtype != targettype) + { + /* find a cast function */ + pathtype = find_coercion_pathway(targettype, valtype, + COERCION_EXPLICIT, + &castfunc); + if (pathtype == COERCION_PATH_FUNC) + { + Assert(OidIsValid(castfunc)); + fmgr_info_cxt(castfunc, &aggstate->cast_func_finfo, + aggcontext); + } + else if (pathtype == COERCION_PATH_RELABELTYPE) + { + aggstate->cast_func_finfo.fn_oid = InvalidOid; + } + else + elog(ERROR, "no conversion function from %s %s", + format_type_be(valtype), + format_type_be(targettype)); + } + + return aggstate; + } + + /* + * append a non NULL value to tuplesort + */ + static Datum + common_median_transfn(FunctionCallInfo fcinfo, Oid typoid, Oid targetoid) + { + MedianAggState *aggstate; + + aggstate = PG_ARGISNULL(0) ? NULL : (MedianAggState *) PG_GETARG_POINTER(0); + + if (!PG_ARGISNULL(1)) + { + if (aggstate == NULL) + aggstate = makeMedianAggState(fcinfo, typoid, targetoid); + + tuplesort_putdatum(aggstate->sortstate, PG_GETARG_DATUM(1), false); + aggstate->nelems++; + } + + PG_RETURN_POINTER(aggstate); + } + + /* + * just wrappers to be opr sanity checks happy + */ + Datum + median_numeric_transfn(PG_FUNCTION_ARGS) + { + return common_median_transfn(fcinfo, + NUMERICOID, NUMERICOID); + } + + Datum + median_int8_transfn(PG_FUNCTION_ARGS) + { + return common_median_transfn(fcinfo, + INT8OID, NUMERICOID); + } + + Datum + median_int4_transfn(PG_FUNCTION_ARGS) + { + return common_median_transfn(fcinfo, + INT4OID, NUMERICOID); + } + + Datum + median_int2_transfn(PG_FUNCTION_ARGS) + { + return common_median_transfn(fcinfo, + INT2OID, NUMERICOID); + } + + Datum + median_double_transfn(PG_FUNCTION_ARGS) + { + return common_median_transfn(fcinfo, + FLOAT8OID, FLOAT8OID); + } + + Datum + median_float_transfn(PG_FUNCTION_ARGS) + { + return common_median_transfn(fcinfo, + FLOAT4OID, FLOAT8OID); + } + + /* + * Used for reading values from tuplesort. The value has to be + * double or cast function is defined (and used). + */ + static double + to_double(Datum value, FmgrInfo *cast_func_finfo) + { + /* when valtype is same as target type, returns directly */ + if (cast_func_finfo->fn_oid == InvalidOid) + return DatumGetFloat8(value); + + return DatumGetFloat8(FunctionCall1(cast_func_finfo, value)); + } + + /* + * Used as final function for median when result is double. + */ + Datum + median_finalfn_double(PG_FUNCTION_ARGS) + { + MedianAggState *aggstate; + + Assert(AggCheckCallContext(fcinfo, NULL)); + + aggstate = PG_ARGISNULL(0) ? NULL : (MedianAggState *) PG_GETARG_POINTER(0); + + if (aggstate != NULL) + { + int lidx; + int hidx; + Datum value; + bool isNull; + int i = 1; + double result = 0; + + hidx = aggstate->nelems / 2 + 1; + lidx = (aggstate->nelems + 1) / 2; + + tuplesort_performsort(aggstate->sortstate); + + while (tuplesort_getdatum(aggstate->sortstate, + true, + &value, &isNull)) + { + if (i++ == lidx) + { + result = to_double(value, &aggstate->cast_func_finfo); + + if (lidx != hidx) + { + tuplesort_getdatum(aggstate->sortstate, + true, + &value, &isNull); + result = (result + to_double(value, &aggstate->cast_func_finfo)) / 2.0; + } + break; + } + } + + tuplesort_end(aggstate->sortstate); + + PG_RETURN_FLOAT8(result); + } + else + PG_RETURN_NULL(); + } + + /* + * Used for reading values from tuplesort. The value has to be + * Numeric or cast function is defined (and used). + */ + static Numeric + to_numeric(Datum value, FmgrInfo *cast_func_finfo) + { + /* when valtype is same as target type, returns directly */ + if (cast_func_finfo->fn_oid == InvalidOid) + return DatumGetNumeric(value); + + return DatumGetNumeric(FunctionCall1(cast_func_finfo, value)); + } + + /* + * Used as final function for median when result is numeric. + */ + Datum + median_finalfn_numeric(PG_FUNCTION_ARGS) + { + MedianAggState *aggstate; + + Assert(AggCheckCallContext(fcinfo, NULL)); + + aggstate = PG_ARGISNULL(0) ? NULL : (MedianAggState *) PG_GETARG_POINTER(0); + + if (aggstate != NULL) + { + int lidx; + int hidx; + Datum value; + bool isNull; + int i = 1; + Numeric result = NULL; /* be compiler quiet */ + + hidx = aggstate->nelems / 2 + 1; + lidx = (aggstate->nelems + 1) / 2; + + tuplesort_performsort(aggstate->sortstate); + + while (tuplesort_getdatum(aggstate->sortstate, + true, + &value, &isNull)) + { + if (i++ == lidx) + { + result = to_numeric(value, &aggstate->cast_func_finfo); + + if (lidx != hidx) + { + Numeric stack; + + tuplesort_getdatum(aggstate->sortstate, + true, + &value, &isNull); + stack = to_numeric(value, &aggstate->cast_func_finfo); + stack = DatumGetNumeric(DirectFunctionCall2(numeric_add, + NumericGetDatum(stack), + NumericGetDatum(result))); + result = DatumGetNumeric(DirectFunctionCall2(numeric_div, + NumericGetDatum(stack), + DirectFunctionCall1(float4_numeric, + Float4GetDatum(2.0)))); + } + break; + } + } + + tuplesort_end(aggstate->sortstate); + + PG_RETURN_NUMERIC(result); + } + else + PG_RETURN_NULL(); + } *** ./src/include/catalog/pg_aggregate.h.orig 2010-08-05 20:21:17.000000000 +0200 --- ./src/include/catalog/pg_aggregate.h 2010-09-23 14:32:25.572576527 +0200 *************** *** 226,231 **** --- 226,239 ---- /* text */ DATA(insert ( 3538 string_agg_transfn string_agg_finalfn 0 2281 _null_ )); + /* median */ + DATA(insert ( 3123 median_double_transfn median_finalfn_double 0 2281 _null_ )); + DATA(insert ( 3124 median_float_transfn median_finalfn_double 0 2281 _null_ )); + DATA(insert ( 3125 median_numeric_transfn median_finalfn_numeric 0 2281 _null_ )); + DATA(insert ( 3126 median_int8_transfn median_finalfn_numeric 0 2281 _null_ )); + DATA(insert ( 3127 median_int4_transfn median_finalfn_numeric 0 2281 _null_ )); + DATA(insert ( 3128 median_int2_transfn median_finalfn_numeric 0 2281 _null_ )); + /* * prototypes for functions in pg_aggregate.c */ *** ./src/include/catalog/pg_proc.h.orig 2010-08-13 20:36:25.000000000 +0200 --- ./src/include/catalog/pg_proc.h 2010-09-23 14:36:55.381451421 +0200 *************** *** 4823,4828 **** --- 4823,4857 ---- DATA(insert OID = 3114 ( nth_value PGNSP PGUID 12 1 0 0 f t f t f i 2 0 2283 "2283 23" _null_ _null_ _null_ _null_ window_nth_value _null_ _null_ _null_ )); DESCR("fetch the Nth row value"); + /* median aggregate */ + DATA(insert OID = 3115 ( median_numeric_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 1700" _null_ _null_ _null_ _null_ median_numeric_transfn _null_ _null_ _null_ )); + DESCR("median transident function for numeric"); + DATA(insert OID = 3116 ( median_int8_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 20" _null_ _null_ _null_ _null_ median_int8_transfn _null_ _null_ _null_ )); + DESCR("median transident function for bigint"); + DATA(insert OID = 3117 ( median_int4_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 23" _null_ _null_ _null_ _null_ median_int4_transfn _null_ _null_ _null_ )); + DESCR("median transident function for int"); + DATA(insert OID = 3118 ( median_int2_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 21" _null_ _null_ _null_ _null_ median_int2_transfn _null_ _null_ _null_ )); + DESCR("median transident function for smallint"); + DATA(insert OID = 3119 ( median_double_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 701" _null_ _null_ _null_ _null_ median_double_transfn _null_ _null_ _null_ )); + DESCR("median transident function for double precision"); + DATA(insert OID = 3120 ( median_float_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 700" _null_ _null_ _null_ _null_ median_float_transfn _null_ _null_ _null_ )); + DESCR("median transident function for real"); + DATA(insert OID = 3121 ( median_finalfn_double PGNSP PGUID 12 1 0 0 f f f f f i 1 0 701 "2281" _null_ _null_ _null_ _null_ median_finalfn_double _null_ _null_ _null_ )); + DESCR("median final function with double precision result"); + DATA(insert OID = 3122 ( median_finalfn_numeric PGNSP PGUID 12 1 0 0 f f f f f i 1 0 1700 "2281" _null_ _null_ _null_ _null_ median_finalfn_numeric _null_ _null_ _null_ )); + DESCR("median final function with numeric result"); + DATA(insert OID = 3123 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 701 "701" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("median aggregate"); + DATA(insert OID = 3124 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 701 "700" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("median aggregate"); + DATA(insert OID = 3125 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "1700" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("median aggregate"); + DATA(insert OID = 3126 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "20" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("median aggregate"); + DATA(insert OID = 3127 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "23" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("median aggregate"); + DATA(insert OID = 3128 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "21" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("median aggregate"); /* * Symbolic values for provolatile column: these indicate whether the result *** ./src/include/utils/builtins.h.orig 2010-08-10 23:51:00.000000000 +0200 --- ./src/include/utils/builtins.h 2010-09-23 14:22:16.929453299 +0200 *************** *** 924,929 **** --- 924,937 ---- extern Datum int8_avg(PG_FUNCTION_ARGS); extern Datum width_bucket_numeric(PG_FUNCTION_ARGS); extern Datum hash_numeric(PG_FUNCTION_ARGS); + extern Datum median_numeric_transfn(PG_FUNCTION_ARGS); + extern Datum median_int8_transfn(PG_FUNCTION_ARGS); + extern Datum median_int4_transfn(PG_FUNCTION_ARGS); + extern Datum median_int2_transfn(PG_FUNCTION_ARGS); + extern Datum median_double_transfn(PG_FUNCTION_ARGS); + extern Datum median_float_transfn(PG_FUNCTION_ARGS); + extern Datum median_finalfn_double(PG_FUNCTION_ARGS); + extern Datum median_finalfn_numeric(PG_FUNCTION_ARGS); /* ri_triggers.c */ extern Datum RI_FKey_check_ins(PG_FUNCTION_ARGS); *** ./src/test/regress/expected/numeric.out.orig 2009-08-10 20:29:27.000000000 +0200 --- ./src/test/regress/expected/numeric.out 2010-09-23 14:44:18.000000000 +0200 *************** *** 1372,1374 **** --- 1372,1402 ---- 12345678901234567890 (1 row) + -- median test + create table median_test (a int, b bigint, c smallint, d numeric, e double precision, f real); + insert into median_test select i,i,i,i,i,i from generate_series(1,10) g(i); + select median(a), + median(b), + median(c), + median(d), + median(e), + median(f) from median_test; + median | median | median | median | median | median + --------------------+--------------------+--------------------+--------------------+--------+-------- + 5.5000000000000000 | 5.5000000000000000 | 5.5000000000000000 | 5.5000000000000000 | 5.5 | 5.5 + (1 row) + + truncate table median_test; + insert into median_test select i,i,i,i,i,i from generate_series(1,11) g(i); + select median(a), + median(b), + median(c), + median(d), + median(e), + median(f) from median_test; + median | median | median | median | median | median + --------+--------+--------+--------+--------+-------- + 6 | 6 | 6 | 6 | 6 | 6 + (1 row) + + drop table median_test; *** ./src/test/regress/sql/numeric.sql.orig 2009-08-10 20:29:27.000000000 +0200 --- ./src/test/regress/sql/numeric.sql 2010-09-23 14:49:45.480455045 +0200 *************** *** 824,826 **** --- 824,845 ---- select 12345678901234567890 / 123; select div(12345678901234567890, 123); select div(12345678901234567890, 123) * 123 + 12345678901234567890 % 123; + + -- median test + create table median_test (a int, b bigint, c smallint, d numeric, e double precision, f real); + insert into median_test select i,i,i,i,i,i from generate_series(1,10) g(i); + select median(a), + median(b), + median(c), + median(d), + median(e), + median(f) from median_test; + truncate table median_test; + insert into median_test select i,i,i,i,i,i from generate_series(1,11) g(i); + select median(a), + median(b), + median(c), + median(d), + median(e), + median(f) from median_test; + drop table median_test;
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers