sorry

little bit fixed patch

Pavel

2010/9/23 Pavel Stehule <pavel.steh...@gmail.com>:
> Hello
>
> I moved a "median" function to core.
>
> + doc part
> + regress test
>
> Regards
>
> Pavel Stehule
>
>
> 2010/9/20 Hitoshi Harada <umi.tan...@gmail.com>:
>> 2010/8/19 Pavel Stehule <pavel.steh...@gmail.com>:
>>> Hello
>>>
>>> I am sending a prototype implementation of functions median and
>>> percentile. This implementation is very simple and I moved it to
>>> contrib for this moment - it is more easy maintainable. Later I'll
>>> move it to core.
>>
>> I've reviewed this patch.
>>
>> * The patch can apply cleanly and make doesn't print any errors nor
>> warnings. But it doesn't touch contrib/Makefile so I had to make by
>> changing dir to contrib/median.
>>
>> * Cosmetic coding style should be more appropriate, including trailing
>> white spaces and indentation positions.
>>
>> * Since these two aggregates use tuplesort inside it, there're
>> possible risk to cause out of memory in normal use case. I don't think
>> this fact is critical, but at least some notation should be referred
>> in docs.
>>
>> * It doesn't contain any document nor regression tests.
>>
>> * They should be callable in window function context; for example
>>
>> contrib_regression=# select median(a) over (order by a) from t1;
>> ERROR:  invalid tuplesort state
>>
>> or at least user-friend message should be printed.
>>
>> * The returned type is controversy. median(int) returns float8 as the
>> patch intended, but avg(int) returns numeric. AFAIK only avg(float8)
>> returns float8.
>>
>> * percentile() is more problematic; first, the second argument for the
>> aggregate takes N of "N%ile" as int, like 50 if you want 50%ile,  but
>> it doesn't care about negative values or more than 100. In addition,
>> the second argument is taken at the first non-NULL value of the first
>> argument, but the second argument is semantically constant. For
>> example, for (1.. 10) value of a in table t1,
>>
>> contrib_regression=# select percentile(a, a * 10 order by a) from t1;
>>  percentile
>> ------------
>>          1
>> (1 row)
>>
>> contrib_regression=# select percentile(a, a * 10 order by a desc) from t1;
>>  percentile
>> ------------
>>         10
>> (1 row)
>>
>> and if the argument comes from the subquery which doesn't contain
>> ORDER BY clause, you cannot predict the result.
>>
>> That's all of my quick review up to now.
>>
>> Regards,
>>
>> --
>> Hitoshi Harada
>>
>
*** ./doc/src/sgml/func.sgml.orig	2010-08-17 06:37:20.000000000 +0200
--- ./doc/src/sgml/func.sgml	2010-09-23 15:03:10.021576906 +0200
***************
*** 10304,10309 ****
--- 10304,10329 ----
  
       <row>
        <entry>
+        <indexterm>
+         <primary>Arithmetic median</primary>
+         <secondary>median</secondary>
+        </indexterm>
+        <function>median(<replaceable class="parameter">expression</replaceable>)</function>
+       </entry>
+       <entry>
+        <type>smallint</type>, <type>int</type>,
+        <type>bigint</type>, <type>real</type>, <type>double
+        precision</type>, or <type>numeric</type>
+       </entry>
+       <entry>
+        <type>double precision</type> for floating-point arguments,
+        otherwise <type>numeric</type>
+       </entry>
+       <entry>arithmetic median</entry>
+      </row>
+ 
+      <row>
+       <entry>
         <function>regr_avgx(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
        </entry>
        <entry>
*** ./src/backend/utils/adt/numeric.c.orig	2010-08-04 19:33:09.000000000 +0200
--- ./src/backend/utils/adt/numeric.c	2010-09-23 15:24:04.025453940 +0200
***************
*** 30,39 ****
--- 30,42 ----
  #include "catalog/pg_type.h"
  #include "libpq/pqformat.h"
  #include "miscadmin.h"
+ #include "parser/parse_coerce.h"
+ #include "parser/parse_oper.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
  #include "utils/int8.h"
  #include "utils/numeric.h"
+ #include "utils/tuplesort.h"
  
  /* ----------
   * Uncomment the following to enable compilation of dump_numeric()
***************
*** 144,149 ****
--- 147,161 ----
  	union NumericChoice	choice;	/* choice of format */
  };
  
+ /*
+  * used as type of state variable median's function
+  */
+ typedef struct
+ {
+ 	int	nelems;		/* number of valid entries */
+ 	Tuplesortstate *sortstate;
+ 	FmgrInfo	cast_func_finfo;
+ } MedianAggState;
  
  /*
   * Interpretation of high bits.
***************
*** 6173,6175 ****
--- 6185,6467 ----
  	var->digits = digits;
  	var->ndigits = ndigits;
  }
+ 
+ static MedianAggState *
+ makeMedianAggState(FunctionCallInfo fcinfo, Oid valtype, Oid targettype)
+ {
+ 	MemoryContext oldctx;
+ 	MemoryContext aggcontext;
+ 	MedianAggState *aggstate;
+ 	Oid	sortop,
+ 			castfunc;
+ 	CoercionPathType		pathtype;
+ 
+ 	/*
+ 	 * We cannot to allow a median function under WindowAgg State.
+ 	 * This content needs a repetetive a calling of final function,
+ 	 * what isn't possible now with using a tuplestore. 
+ 	 * tuplesort_performsort can be called only once, and some has
+ 	 * to call a tuplesort_end.
+ 	 */
+ 	if (fcinfo->context && IsA(fcinfo->context, WindowAggState))
+ 	{
+ 		/* cannot be called inside like windowAggregates */
+ 		elog(ERROR, "median_transfn called as windows aggregates");
+ 	}
+ 
+ 	if (!AggCheckCallContext(fcinfo, &aggcontext))
+ 	{
+ 		/* cannot be called directly because of internal-type argument */
+ 		elog(ERROR, "median_transfn called in non-aggregate context");
+ 	}
+ 
+ 	oldctx = MemoryContextSwitchTo(aggcontext);
+ 
+ 	aggstate = (MedianAggState *) palloc0(sizeof(MedianAggState));
+ 
+ 	valtype = get_fn_expr_argtype(fcinfo->flinfo, 1);
+ 	get_sort_group_operators(valtype,
+ 						    true, false, false,
+ 						    &sortop, NULL, NULL);
+ 
+ 	aggstate->sortstate = tuplesort_begin_datum(valtype,
+ 							sortop,
+ 							SORTBY_NULLS_DEFAULT,
+ 							work_mem, true);
+ 
+ 	MemoryContextSwitchTo(oldctx);
+ 
+ 	if (valtype != targettype)
+ 	{
+ 		/* find a cast function */
+ 		pathtype = find_coercion_pathway(targettype, valtype,
+ 									COERCION_EXPLICIT,
+ 									&castfunc);
+ 		if (pathtype == COERCION_PATH_FUNC)
+ 		{
+ 			Assert(OidIsValid(castfunc));
+ 			fmgr_info_cxt(castfunc, &aggstate->cast_func_finfo,
+ 									    aggcontext);
+ 		} 
+ 		else if (pathtype == COERCION_PATH_RELABELTYPE)
+ 		{
+ 			aggstate->cast_func_finfo.fn_oid = InvalidOid;
+ 		}
+ 		else 
+ 			elog(ERROR, "no conversion function from %s %s",
+ 					 format_type_be(valtype),
+ 					 format_type_be(targettype));
+ 	}
+ 
+ 	return aggstate;
+ }
+ 
+ /*
+  *  append a non NULL value to tuplesort
+  */
+ static Datum
+ common_median_transfn(FunctionCallInfo fcinfo, Oid typoid, Oid targetoid)
+ {
+ 	MedianAggState *aggstate;
+ 	
+ 	aggstate = PG_ARGISNULL(0) ? NULL : (MedianAggState *) PG_GETARG_POINTER(0);
+ 
+ 	if (!PG_ARGISNULL(1))
+ 	{
+ 		if (aggstate == NULL)
+ 			aggstate = makeMedianAggState(fcinfo, typoid, targetoid);
+ 
+ 		tuplesort_putdatum(aggstate->sortstate, PG_GETARG_DATUM(1), false);
+ 		aggstate->nelems++;
+ 	}
+ 
+ 	PG_RETURN_POINTER(aggstate);
+ }
+ 
+ /*
+  * just wrappers to be opr sanity checks happy
+  */
+ Datum
+ median_numeric_transfn(PG_FUNCTION_ARGS)
+ {
+ 	return common_median_transfn(fcinfo, 
+ 						NUMERICOID, NUMERICOID);
+ }
+ 
+ Datum
+ median_int8_transfn(PG_FUNCTION_ARGS)
+ {
+ 	return common_median_transfn(fcinfo, 
+ 						INT8OID, NUMERICOID);
+ }
+ 
+ Datum
+ median_int4_transfn(PG_FUNCTION_ARGS)
+ {
+ 	return common_median_transfn(fcinfo, 
+ 						INT4OID, NUMERICOID);
+ }
+ 
+ Datum
+ median_int2_transfn(PG_FUNCTION_ARGS)
+ {
+ 	return common_median_transfn(fcinfo,
+ 						INT2OID, NUMERICOID);
+ }
+ 
+ Datum
+ median_double_transfn(PG_FUNCTION_ARGS)
+ {
+ 	return common_median_transfn(fcinfo,
+ 						FLOAT8OID, FLOAT8OID);
+ }
+ 
+ Datum
+ median_float_transfn(PG_FUNCTION_ARGS)
+ {
+ 	return common_median_transfn(fcinfo,
+ 						FLOAT4OID, FLOAT8OID);
+ }
+ 
+ /*
+  * Used for reading values from tuplesort. The value has to be
+  * double or cast function is defined (and used).
+  */
+ static double 
+ to_double(Datum value, FmgrInfo *cast_func_finfo)
+ {
+ 	/* when valtype is same as target type, returns directly */
+ 	if (cast_func_finfo->fn_oid == InvalidOid)
+ 		return DatumGetFloat8(value);
+ 
+ 	return DatumGetFloat8(FunctionCall1(cast_func_finfo, value));
+ }
+ 
+ /*
+  * Used as final function for median when result is double.
+  */
+ Datum
+ median_finalfn_double(PG_FUNCTION_ARGS)
+ {
+ 	MedianAggState *aggstate;
+ 
+ 	Assert(AggCheckCallContext(fcinfo, NULL));
+ 
+ 	aggstate = PG_ARGISNULL(0) ? NULL : (MedianAggState *) PG_GETARG_POINTER(0);
+ 
+ 	if (aggstate != NULL)
+ 	{
+ 		int	lidx;
+ 		int	hidx;
+ 		Datum	   value;
+ 		bool	isNull;
+ 		int		i = 1;
+ 		double	result = 0;
+ 
+ 		hidx = aggstate->nelems / 2 + 1;
+ 		lidx = (aggstate->nelems + 1) / 2;
+ 
+ 		tuplesort_performsort(aggstate->sortstate);
+ 
+ 		while (tuplesort_getdatum(aggstate->sortstate, 
+ 								true,
+ 								      &value, &isNull))
+ 		{
+ 			if (i++ == lidx)
+ 			{
+ 				result = to_double(value, &aggstate->cast_func_finfo);
+ 
+ 				if (lidx != hidx)
+ 				{
+ 					tuplesort_getdatum(aggstate->sortstate,
+ 										true,
+ 										      &value, &isNull);
+ 					result = (result + to_double(value, &aggstate->cast_func_finfo)) / 2.0;
+ 				}
+ 				break;
+ 			}
+ 		}
+ 
+ 		tuplesort_end(aggstate->sortstate);
+ 
+ 		PG_RETURN_FLOAT8(result);
+ 	}
+ 	else
+ 		PG_RETURN_NULL();
+ }
+ 
+ /*
+  * Used for reading values from tuplesort. The value has to be
+  * Numeric or cast function is defined (and used).
+  */
+ static Numeric
+ to_numeric(Datum value, FmgrInfo *cast_func_finfo)
+ {
+ 	/* when valtype is same as target type, returns directly */
+ 	if (cast_func_finfo->fn_oid == InvalidOid)
+ 		return DatumGetNumeric(value);
+ 
+ 	return DatumGetNumeric(FunctionCall1(cast_func_finfo, value));
+ }
+ 
+ /*
+  * Used as final function for median when result is numeric.
+  */
+ Datum
+ median_finalfn_numeric(PG_FUNCTION_ARGS)
+ {
+ 	MedianAggState *aggstate;
+ 
+ 	Assert(AggCheckCallContext(fcinfo, NULL));
+ 
+ 	aggstate = PG_ARGISNULL(0) ? NULL : (MedianAggState *) PG_GETARG_POINTER(0);
+ 
+ 	if (aggstate != NULL)
+ 	{
+ 		int	lidx;
+ 		int	hidx;
+ 		Datum	   value;
+ 		bool	isNull;
+ 		int		i = 1;
+ 		Numeric result = NULL;		/* be compiler quiet */
+ 
+ 		hidx = aggstate->nelems / 2 + 1;
+ 		lidx = (aggstate->nelems + 1) / 2;
+ 
+ 		tuplesort_performsort(aggstate->sortstate);
+ 		
+ 		while (tuplesort_getdatum(aggstate->sortstate, 
+ 								true,
+ 								      &value, &isNull))
+ 		{
+ 			if (i++ == lidx)
+ 			{
+ 				result = to_numeric(value, &aggstate->cast_func_finfo);
+ 
+ 				if (lidx != hidx)
+ 				{
+ 					Numeric stack;
+ 
+ 					tuplesort_getdatum(aggstate->sortstate,
+ 										true,
+ 										      &value, &isNull);
+ 					stack = to_numeric(value, &aggstate->cast_func_finfo);
+ 					stack = DatumGetNumeric(DirectFunctionCall2(numeric_add,
+ 												NumericGetDatum(stack),
+ 												NumericGetDatum(result)));
+ 					result = DatumGetNumeric(DirectFunctionCall2(numeric_div,
+ 												NumericGetDatum(stack),
+ 												DirectFunctionCall1(float4_numeric,
+ 																    Float4GetDatum(2.0))));
+ 				}
+ 				break;
+ 			}
+ 		}
+ 
+ 		tuplesort_end(aggstate->sortstate);
+ 
+ 		PG_RETURN_NUMERIC(result);
+ 	}
+ 	else
+ 		PG_RETURN_NULL();
+ }
*** ./src/include/catalog/pg_aggregate.h.orig	2010-08-05 20:21:17.000000000 +0200
--- ./src/include/catalog/pg_aggregate.h	2010-09-23 14:32:25.572576527 +0200
***************
*** 226,231 ****
--- 226,239 ----
  /* text */
  DATA(insert ( 3538	string_agg_transfn	string_agg_finalfn		0	2281	_null_ ));
  
+ /* median */
+ DATA(insert ( 3123	median_double_transfn	median_finalfn_double		0	2281	_null_ ));
+ DATA(insert ( 3124	median_float_transfn	median_finalfn_double		0	2281	_null_ ));
+ DATA(insert ( 3125	median_numeric_transfn	median_finalfn_numeric		0	2281	_null_ ));
+ DATA(insert ( 3126	median_int8_transfn	median_finalfn_numeric		0	2281	_null_ ));
+ DATA(insert ( 3127	median_int4_transfn	median_finalfn_numeric		0	2281	_null_ ));
+ DATA(insert ( 3128	median_int2_transfn	median_finalfn_numeric		0	2281	_null_ ));
+ 
  /*
   * prototypes for functions in pg_aggregate.c
   */
*** ./src/include/catalog/pg_proc.h.orig	2010-08-13 20:36:25.000000000 +0200
--- ./src/include/catalog/pg_proc.h	2010-09-23 14:36:55.381451421 +0200
***************
*** 4823,4828 ****
--- 4823,4857 ----
  DATA(insert OID = 3114 (  nth_value		PGNSP PGUID 12 1 0 0 f t f t f i 2 0 2283 "2283 23" _null_ _null_ _null_ _null_ window_nth_value _null_ _null_ _null_ ));
  DESCR("fetch the Nth row value");
  
+ /* median aggregate */
+ DATA(insert OID = 3115 ( median_numeric_transfn			PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 1700" _null_ _null_ _null_ _null_	median_numeric_transfn _null_ _null_ _null_ ));
+ DESCR("median transident function for numeric");
+ DATA(insert OID = 3116 ( median_int8_transfn			PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 20" _null_ _null_ _null_ _null_	median_int8_transfn _null_ _null_ _null_ ));
+ DESCR("median transident function for bigint");
+ DATA(insert OID = 3117 ( median_int4_transfn			PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 23" _null_ _null_ _null_ _null_	median_int4_transfn _null_ _null_ _null_ ));
+ DESCR("median transident function for int");
+ DATA(insert OID = 3118 ( median_int2_transfn			PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 21" _null_ _null_ _null_ _null_	median_int2_transfn _null_ _null_ _null_ ));
+ DESCR("median transident function for smallint");
+ DATA(insert OID = 3119 ( median_double_transfn			PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 701" _null_ _null_ _null_ _null_	median_double_transfn _null_ _null_ _null_ ));
+ DESCR("median transident function for double precision");
+ DATA(insert OID = 3120 ( median_float_transfn			PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 700" _null_ _null_ _null_ _null_	median_float_transfn _null_ _null_ _null_ ));
+ DESCR("median transident function for real");
+ DATA(insert OID = 3121 ( median_finalfn_double			PGNSP PGUID 12 1 0 0 f f f f f i 1 0 701 "2281" _null_ _null_ _null_ _null_	median_finalfn_double _null_ _null_ _null_ ));
+ DESCR("median final function with double precision result");
+ DATA(insert OID = 3122 ( median_finalfn_numeric			PGNSP PGUID 12 1 0 0 f f f f f i 1 0 1700 "2281" _null_ _null_ _null_ _null_	median_finalfn_numeric _null_ _null_ _null_ ));
+ DESCR("median final function with numeric result");
+ DATA(insert OID = 3123 ( median					   PGNSP PGUID 12 1 0 0 t f f f f i 1 0 701 "701" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ DESCR("median aggregate");
+ DATA(insert OID = 3124 ( median					   PGNSP PGUID 12 1 0 0 t f f f f i 1 0 701 "700" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ DESCR("median aggregate");
+ DATA(insert OID = 3125 ( median					   PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "1700" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ DESCR("median aggregate");
+ DATA(insert OID = 3126 ( median					   PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "20" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ DESCR("median aggregate");
+ DATA(insert OID = 3127 ( median					   PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "23" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ DESCR("median aggregate");
+ DATA(insert OID = 3128 ( median					   PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "21" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ DESCR("median aggregate");
  
  /*
   * Symbolic values for provolatile column: these indicate whether the result
*** ./src/include/utils/builtins.h.orig	2010-08-10 23:51:00.000000000 +0200
--- ./src/include/utils/builtins.h	2010-09-23 14:22:16.929453299 +0200
***************
*** 924,929 ****
--- 924,937 ----
  extern Datum int8_avg(PG_FUNCTION_ARGS);
  extern Datum width_bucket_numeric(PG_FUNCTION_ARGS);
  extern Datum hash_numeric(PG_FUNCTION_ARGS);
+ extern Datum median_numeric_transfn(PG_FUNCTION_ARGS);
+ extern Datum median_int8_transfn(PG_FUNCTION_ARGS);
+ extern Datum median_int4_transfn(PG_FUNCTION_ARGS);
+ extern Datum median_int2_transfn(PG_FUNCTION_ARGS);
+ extern Datum median_double_transfn(PG_FUNCTION_ARGS);
+ extern Datum median_float_transfn(PG_FUNCTION_ARGS);
+ extern Datum median_finalfn_double(PG_FUNCTION_ARGS);
+ extern Datum median_finalfn_numeric(PG_FUNCTION_ARGS);
  
  /* ri_triggers.c */
  extern Datum RI_FKey_check_ins(PG_FUNCTION_ARGS);
*** ./src/test/regress/expected/numeric.out.orig	2009-08-10 20:29:27.000000000 +0200
--- ./src/test/regress/expected/numeric.out	2010-09-23 14:44:18.000000000 +0200
***************
*** 1372,1374 ****
--- 1372,1402 ----
   12345678901234567890
  (1 row)
  
+ -- median test
+ create table median_test (a int, b bigint, c smallint, d numeric, e double precision, f real);
+ insert into median_test select i,i,i,i,i,i from generate_series(1,10) g(i);
+ select median(a),
+        median(b),
+        median(c),
+        median(d),
+        median(e),
+        median(f) from median_test;
+        median       |       median       |       median       |       median       | median | median 
+ --------------------+--------------------+--------------------+--------------------+--------+--------
+  5.5000000000000000 | 5.5000000000000000 | 5.5000000000000000 | 5.5000000000000000 |    5.5 |    5.5
+ (1 row)
+ 
+ truncate table median_test;
+ insert into median_test select i,i,i,i,i,i from generate_series(1,11) g(i);
+ select median(a),
+        median(b),
+        median(c),
+        median(d),
+        median(e),
+        median(f) from median_test;
+  median | median | median | median | median | median 
+ --------+--------+--------+--------+--------+--------
+       6 |      6 |      6 |      6 |      6 |      6
+ (1 row)
+ 
+ drop table median_test;
*** ./src/test/regress/sql/numeric.sql.orig	2009-08-10 20:29:27.000000000 +0200
--- ./src/test/regress/sql/numeric.sql	2010-09-23 14:49:45.480455045 +0200
***************
*** 824,826 ****
--- 824,845 ----
  select 12345678901234567890 / 123;
  select div(12345678901234567890, 123);
  select div(12345678901234567890, 123) * 123 + 12345678901234567890 % 123;
+ 
+ -- median test
+ create table median_test (a int, b bigint, c smallint, d numeric, e double precision, f real);
+ insert into median_test select i,i,i,i,i,i from generate_series(1,10) g(i);
+ select median(a),
+        median(b),
+        median(c),
+        median(d),
+        median(e),
+        median(f) from median_test;
+ truncate table median_test;
+ insert into median_test select i,i,i,i,i,i from generate_series(1,11) g(i);
+ select median(a),
+        median(b),
+        median(c),
+        median(d),
+        median(e),
+        median(f) from median_test;
+ drop table median_test;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to