On Fri, 24 Mar 2023 at 14:48, Egor Rogov <e.ro...@postgrespro.ru> wrote: > > Done.
> There is one thing I'm not sure what to do about. This check: > > if (typentry->typtype != TYPTYPE_RANGE) > ereport(ERROR, > (errcode(ERRCODE_DATATYPE_MISMATCH), > errmsg("expected array of ranges"))); > > doesn't work, because the range_get_typcache() call errors out first > ("type %u is not a range type"). The message doesn't look friendly > enough for user-faced SQL function. Should we duplicate > range_get_typcache's logic and replace the error message? > Okay. I've corrected the examples a bit. It sounds like you've addressed Tomas's feedback and still have one open question. Fwiw I rebased it, it seemed to merge fine automatically. I've updated the CF entry to Needs Review. But at this late date it may have to wait until the next release. -- Gregory Stark As Commitfest Manager
From 87424880f1a970448979681684e6916f33567eeb Mon Sep 17 00:00:00 2001 From: Greg Stark <st...@mit.edu> Date: Mon, 3 Apr 2023 17:04:11 -0400 Subject: [PATCH] pg_stats and range statistics --- doc/src/sgml/func.sgml | 36 ++++++ doc/src/sgml/system-views.sgml | 40 +++++++ src/backend/catalog/system_views.sql | 30 ++++- src/backend/utils/adt/rangetypes_typanalyze.c | 107 ++++++++++++++++++ src/include/catalog/pg_proc.dat | 10 ++ src/test/regress/expected/rangetypes.out | 22 ++++ src/test/regress/expected/rules.out | 34 +++++- src/test/regress/sql/rangetypes.sql | 7 ++ 8 files changed, 284 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 918a492234..548078a12e 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -19643,6 +19643,24 @@ SELECT NULLIF(value, '(none)') ... </para></entry> </row> + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm> + <primary>ranges_lower</primary> + </indexterm> + <function>ranges_lower</function> ( <type>anyarray</type> ) + <returnvalue>anyarray</returnvalue> + </para> + <para> + Extracts lower bounds of ranges in the array (<literal>NULL</literal> if + the range is empty or the lower bound is infinite). + </para> + <para> + <literal>lower(ARRAY[numrange(1.1,2.2),numrange(3.3,4.4)])</literal> + <returnvalue>{1.1,3.3}</returnvalue> + </para></entry> + </row> + <row> <entry role="func_table_entry"><para role="func_signature"> <indexterm> @@ -19661,6 +19679,24 @@ SELECT NULLIF(value, '(none)') ... </para></entry> </row> + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm> + <primary>ranges_upper</primary> + </indexterm> + <function>ranges_upper</function> ( <type>anyarray</type> ) + <returnvalue>anyarray</returnvalue> + </para> + <para> + Extracts upper bounds of ranges (<literal>NULL</literal> if the + range is empty or the upper bound is infinite). + </para> + <para> + <literal>upper(ARRAY[numrange(1.1,2.2),numrange(3.3,4.4)])</literal> + <returnvalue>{2.2,4.4}</returnvalue> + </para></entry> + </row> + <row> <entry role="func_table_entry"><para role="func_signature"> <indexterm> diff --git a/doc/src/sgml/system-views.sgml b/doc/src/sgml/system-views.sgml index bb1a418450..d7760838ae 100644 --- a/doc/src/sgml/system-views.sgml +++ b/doc/src/sgml/system-views.sgml @@ -3784,6 +3784,46 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx non-null elements. (Null for scalar types.) </para></entry> </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>range_length_histogram</structfield> <type>anyarray</type> + </para> + <para> + A histogram of the lengths of non-empty and non-null range values of a + range type column. (Null for non-range types.) + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>range_empty_frac</structfield> <type>float4</type> + </para> + <para> + Fraction of column entries whose values are empty ranges. + (Null for non-range types.) + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>range_lower_histogram</structfield> <type>anyarray</type> + </para> + <para> + A histogram of lower bounds of non-empty and non-null range values. + (Null for non-range types.) + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>range_upper_histogram</structfield> <type>anyarray</type> + </para> + <para> + A histogram of upper bounds of non-empty and non-null range values. + (Null for non-range types.) + </para></entry> + </row> </tbody> </tgroup> </table> diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 574cbc2e44..3fb7ee448f 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -243,7 +243,35 @@ CREATE VIEW pg_stats WITH (security_barrier) AS WHEN stakind3 = 5 THEN stanumbers3 WHEN stakind4 = 5 THEN stanumbers4 WHEN stakind5 = 5 THEN stanumbers5 - END AS elem_count_histogram + END AS elem_count_histogram, + CASE + WHEN stakind1 = 6 THEN stanumbers1[1] + WHEN stakind2 = 6 THEN stanumbers2[1] + WHEN stakind3 = 6 THEN stanumbers3[1] + WHEN stakind4 = 6 THEN stanumbers4[1] + WHEN stakind5 = 6 THEN stanumbers5[1] + END AS range_empty_frac, + CASE + WHEN stakind1 = 6 THEN stavalues1 + WHEN stakind2 = 6 THEN stavalues2 + WHEN stakind3 = 6 THEN stavalues3 + WHEN stakind4 = 6 THEN stavalues4 + WHEN stakind5 = 6 THEN stavalues5 + END AS range_length_histogram, + CASE + WHEN stakind1 = 7 THEN ranges_lower(stavalues1) + WHEN stakind2 = 7 THEN ranges_lower(stavalues2) + WHEN stakind3 = 7 THEN ranges_lower(stavalues3) + WHEN stakind4 = 7 THEN ranges_lower(stavalues4) + WHEN stakind5 = 7 THEN ranges_lower(stavalues5) + END AS range_lower_histogram, + CASE + WHEN stakind1 = 7 THEN ranges_upper(stavalues1) + WHEN stakind2 = 7 THEN ranges_upper(stavalues2) + WHEN stakind3 = 7 THEN ranges_upper(stavalues3) + WHEN stakind4 = 7 THEN ranges_upper(stavalues4) + WHEN stakind5 = 7 THEN ranges_upper(stavalues5) + END AS range_upper_histogram FROM pg_statistic s JOIN pg_class c ON (c.oid = s.starelid) JOIN pg_attribute a ON (c.oid = attrelid AND attnum = s.staattnum) LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace) diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c b/src/backend/utils/adt/rangetypes_typanalyze.c index 86810a1a6e..a288ee3ebc 100644 --- a/src/backend/utils/adt/rangetypes_typanalyze.c +++ b/src/backend/utils/adt/rangetypes_typanalyze.c @@ -26,6 +26,8 @@ #include "catalog/pg_operator.h" #include "commands/vacuum.h" +#include "utils/array.h" +#include "utils/arrayaccess.h" #include "utils/float.h" #include "utils/fmgrprotos.h" #include "utils/lsyscache.h" @@ -38,6 +40,7 @@ static int range_bound_qsort_cmp(const void *a1, const void *a2, void *arg); static void compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows); +static Datum ranges_bounds_common(TypeCacheEntry *typentry, AnyArrayType *array, bool need_lower); /* * range_typanalyze -- typanalyze function for range columns @@ -427,3 +430,107 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, * hashtable should also go away, as it used a child memory context. */ } + +/* + * ranges_bounds_common() -- for ranges_upper_bounds() and ranges_lower_bounds() + */ +Datum +ranges_bounds_common(TypeCacheEntry *typentry, AnyArrayType *array, bool need_lower) +{ + int ndims = AARR_NDIM(array); + int *dims = AARR_DIMS(array); + int *lbs = AARR_LBOUND(array); + int i; + array_iter iter; + int nelems; + Datum *elems; + bool *nulls; + TypeCacheEntry *typentry_element; + + if (typentry->typtype != TYPTYPE_RANGE) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("expected array of ranges"))); + + ndims = AARR_NDIM(array); + dims = AARR_DIMS(array); + nelems = ArrayGetNItems(ndims, dims); + + elems = (Datum *) palloc(nelems * sizeof(Datum)); + nulls = (bool *) palloc(nelems * sizeof(bool)); + + array_iter_setup(&iter, array); + + for (i = 0; i < nelems; i++) + { + Datum itemvalue; + bool isnull; + RangeBound lower; + RangeBound upper; + RangeBound *bound; + bool empty; + + /* Get source element, checking for NULL */ + itemvalue = array_iter_next(&iter, &isnull, i, + typentry->typlen, typentry->typbyval, + typentry->typalign); + + Assert(!isnull); + + range_deserialize(typentry, (RangeType *) itemvalue, &lower, &upper, &empty); + bound = need_lower ? &lower : &upper; + /* NULL if there's no finite bound - for consistency with range_lower/range_upper */ + if (empty || bound->infinite) + { + nulls[i] = true; + elems[i] = (Datum) 0; + } + else + { + nulls[i] = false; + elems[i] = bound->val; + } + } + + typentry_element = typentry->rngelemtype; + + PG_RETURN_ARRAYTYPE_P(construct_md_array(elems, nulls, + ndims, dims, lbs, + typentry_element->type_id, + typentry_element->typlen, + typentry_element->typbyval, + typentry_element->typalign)); +} + +/* + * ranges_lower_bounds() -- return array of lower bounds for ranges + */ +Datum +ranges_lower_bounds(PG_FUNCTION_ARGS) +{ + AnyArrayType *array = PG_GETARG_ANY_ARRAY_P(0); + Oid element_type = AARR_ELEMTYPE(array); + TypeCacheEntry *typentry; + + /* Get information about range type; note column might be a domain */ + typentry = range_get_typcache(fcinfo, getBaseType(element_type)); + + return ranges_bounds_common(typentry, array, true); +} + +/* + * ranges_upper_bounds() -- return array of upper bounds for ranges + */ +Datum +ranges_upper_bounds(PG_FUNCTION_ARGS) +{ + AnyArrayType *array = PG_GETARG_ANY_ARRAY_P(0); + Oid element_type = AARR_ELEMTYPE(array); + TypeCacheEntry *typentry; + + /* Get information about range type; note column might be a domain */ + typentry = range_get_typcache(fcinfo, getBaseType(element_type)); + + return ranges_bounds_common(typentry, array, false); +} + diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index f9f2642201..b986610f09 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12053,4 +12053,14 @@ proname => 'any_value_transfn', prorettype => 'anyelement', proargtypes => 'anyelement anyelement', prosrc => 'any_value_transfn' }, +{ oid => '9693', descr => 'lower bounds of ranges', + proname => 'ranges_lower', provolatile => 's', + prorettype => 'anyarray', proargtypes => 'anyarray', + prosrc => 'ranges_lower_bounds' }, + +{ oid => '9694', descr => 'upper bounds of ranges', + proname => 'ranges_upper', provolatile => 's', + prorettype => 'anyarray', proargtypes => 'anyarray', + prosrc => 'ranges_upper_bounds' }, + ] diff --git a/src/test/regress/expected/rangetypes.out b/src/test/regress/expected/rangetypes.out index ee02ff0163..6d7461179f 100644 --- a/src/test/regress/expected/rangetypes.out +++ b/src/test/regress/expected/rangetypes.out @@ -597,6 +597,22 @@ select range_intersect_agg(nr) from numrange_test where nr @> 4.0; [3,5) (1 row) +select ranges_lower(ARRAY[1,2,3]); -- should fail +ERROR: type 23 is not a range type +select ranges_lower(array_agg(nr order by nr)), ranges_upper(array_agg(nr order by nr)) from numrange_test; + ranges_lower | ranges_upper +----------------------------+---------------------------- + {NULL,NULL,NULL,1.1,1.7,3} | {NULL,5,NULL,2.2,1.7,NULL} +(1 row) + +select ranges_lower(a), ranges_upper(a) from ( + select ARRAY[ array_agg(nr order by nr), array_agg(nr order by nr desc) ] from numrange_test +) t(a); + ranges_lower | ranges_upper +---------------------------------------------------------+--------------------------------------------------------- + {{NULL,NULL,NULL,1.1,1.7,3},{3,1.7,1.1,NULL,NULL,NULL}} | {{NULL,5,NULL,2.2,1.7,NULL},{NULL,1.7,2.2,NULL,5,NULL}} +(1 row) + analyze numrange_test; create table numrange_test2(nr numrange); create index numrange_test2_hash_idx on numrange_test2 using hash (nr); @@ -751,6 +767,12 @@ select * from textrange_test where tr < 'empty'; ---- (0 rows) +select ranges_lower(array_agg(tr order by tr)), ranges_upper(array_agg(tr order by tr)) from textrange_test; + ranges_lower | ranges_upper +------------------------+------------------------ + {NULL,NULL,NULL,a,b,d} | {NULL,q,NULL,NULL,g,d} +(1 row) + -- test canonical form for int4range select int4range(1, 10, '[]'); int4range diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index ab1aebfde4..7c783ce47a 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2455,7 +2455,39 @@ pg_stats| SELECT n.nspname AS schemaname, WHEN (s.stakind4 = 5) THEN s.stanumbers4 WHEN (s.stakind5 = 5) THEN s.stanumbers5 ELSE NULL::real[] - END AS elem_count_histogram + END AS elem_count_histogram, + CASE + WHEN (s.stakind1 = 6) THEN s.stanumbers1[1] + WHEN (s.stakind2 = 6) THEN s.stanumbers2[1] + WHEN (s.stakind3 = 6) THEN s.stanumbers3[1] + WHEN (s.stakind4 = 6) THEN s.stanumbers4[1] + WHEN (s.stakind5 = 6) THEN s.stanumbers5[1] + ELSE NULL::real + END AS range_empty_frac, + CASE + WHEN (s.stakind1 = 6) THEN s.stavalues1 + WHEN (s.stakind2 = 6) THEN s.stavalues2 + WHEN (s.stakind3 = 6) THEN s.stavalues3 + WHEN (s.stakind4 = 6) THEN s.stavalues4 + WHEN (s.stakind5 = 6) THEN s.stavalues5 + ELSE NULL::anyarray + END AS range_length_histogram, + CASE + WHEN (s.stakind1 = 7) THEN ranges_lower(s.stavalues1) + WHEN (s.stakind2 = 7) THEN ranges_lower(s.stavalues2) + WHEN (s.stakind3 = 7) THEN ranges_lower(s.stavalues3) + WHEN (s.stakind4 = 7) THEN ranges_lower(s.stavalues4) + WHEN (s.stakind5 = 7) THEN ranges_lower(s.stavalues5) + ELSE NULL::anyarray + END AS range_lower_histogram, + CASE + WHEN (s.stakind1 = 7) THEN ranges_upper(s.stavalues1) + WHEN (s.stakind2 = 7) THEN ranges_upper(s.stavalues2) + WHEN (s.stakind3 = 7) THEN ranges_upper(s.stavalues3) + WHEN (s.stakind4 = 7) THEN ranges_upper(s.stavalues4) + WHEN (s.stakind5 = 7) THEN ranges_upper(s.stavalues5) + ELSE NULL::anyarray + END AS range_upper_histogram FROM (((pg_statistic s JOIN pg_class c ON ((c.oid = s.starelid))) JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum)))) diff --git a/src/test/regress/sql/rangetypes.sql b/src/test/regress/sql/rangetypes.sql index c23be928c3..8948ad3e58 100644 --- a/src/test/regress/sql/rangetypes.sql +++ b/src/test/regress/sql/rangetypes.sql @@ -133,6 +133,12 @@ select range_intersect_agg(nr) from numrange_test; select range_intersect_agg(nr) from numrange_test where false; select range_intersect_agg(nr) from numrange_test where nr @> 4.0; +select ranges_lower(ARRAY[1,2,3]); -- should fail +select ranges_lower(array_agg(nr order by nr)), ranges_upper(array_agg(nr order by nr)) from numrange_test; +select ranges_lower(a), ranges_upper(a) from ( + select ARRAY[ array_agg(nr order by nr), array_agg(nr order by nr desc) ] from numrange_test +) t(a); + analyze numrange_test; create table numrange_test2(nr numrange); @@ -195,6 +201,7 @@ select * from textrange_test where tr = '("b","g")'; select * from textrange_test where tr = '["b","g")'; select * from textrange_test where tr < 'empty'; +select ranges_lower(array_agg(tr order by tr)), ranges_upper(array_agg(tr order by tr)) from textrange_test; -- test canonical form for int4range select int4range(1, 10, '[]'); -- 2.40.0