Just got a report on IRC of a bug in the array version of percentile_cont; if two of the requested percentiles were between the same pair of input rows, the result could be wrong or an error would be generated.
e.g. select percentile_cont(array[0.4,0.6]) within group (order by gs) from generate_series(1,2) gs; ERROR: missing row in percentile_cont Proposed patch (against current master) attached. -- Andrew (irc:RhodiumToad)
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c index 135a14b..1efbf07 100644 --- a/src/backend/utils/adt/orderedsetaggs.c +++ b/src/backend/utils/adt/orderedsetaggs.c @@ -919,7 +919,7 @@ percentile_cont_multi_final_common(FunctionCallInfo fcinfo, rownum = target_row; } - else + else if (target_row == rownum) { /* * We are already at the target row, so we must previously @@ -928,15 +928,23 @@ percentile_cont_multi_final_common(FunctionCallInfo fcinfo, first_val = second_val; } - /* Fetch second_row if needed */ - if (need_lerp) + /* + * We might be lerping between the same two rows of input as the + * previous value. If so, rownum will already be ahead of + * target_row, and we skip any attempt to read more. + */ + if (target_row == rownum) { - if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull) || isnull) - elog(ERROR, "missing row in percentile_cont"); - rownum++; + /* Fetch second_row if needed */ + if (need_lerp) + { + if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull) || isnull) + elog(ERROR, "missing row in percentile_cont"); + rownum++; + } + else + second_val = first_val; } - else - second_val = first_val; /* Compute appropriate result */ if (need_lerp) diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 58df854..40f5398 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1423,11 +1423,11 @@ from tenk1; {{NULL,999,499},{749,249,NULL}} (1 row) -select percentile_cont(array[0,1,0.25,0.75,0.5,1]) within group (order by x) +select percentile_cont(array[0,1,0.25,0.75,0.5,1,0.3,0.32,0.35,0.38,0.4]) within group (order by x) from generate_series(1,6) x; - percentile_cont ------------------------ - {1,6,2.25,4.75,3.5,6} + percentile_cont +------------------------------------------ + {1,6,2.25,4.75,3.5,6,2.5,2.6,2.75,2.9,3} (1 row) select ten, mode() within group (order by string4) from tenk1 group by ten; diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 8096a6f..a84327d 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -533,7 +533,7 @@ select percentile_cont(array[0,0.25,0.5,0.75,1]) within group (order by thousand from tenk1; select percentile_disc(array[[null,1,0.5],[0.75,0.25,null]]) within group (order by thousand) from tenk1; -select percentile_cont(array[0,1,0.25,0.75,0.5,1]) within group (order by x) +select percentile_cont(array[0,1,0.25,0.75,0.5,1,0.3,0.32,0.35,0.38,0.4]) within group (order by x) from generate_series(1,6) x; select ten, mode() within group (order by string4) from tenk1 group by ten;
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers