On Fri, 24 Mar 2023 at 14:48, Egor Rogov <e.ro...@postgrespro.ru> wrote:
>
> Done.

> There is one thing I'm not sure what to do about. This check:
>
>       if (typentry->typtype != TYPTYPE_RANGE)
>           ereport(ERROR,
>                   (errcode(ERRCODE_DATATYPE_MISMATCH),
>                    errmsg("expected array of ranges")));
>
> doesn't work, because the range_get_typcache() call errors out first
> ("type %u is not a range type"). The message doesn't look friendly
> enough for user-faced SQL function. Should we duplicate
> range_get_typcache's logic and replace the error message?

> Okay. I've corrected the examples a bit.

It sounds like you've addressed Tomas's feedback and still have one
open question.

Fwiw I rebased it, it seemed to merge fine automatically.

I've updated the CF entry to Needs Review. But at this late date it
may have to wait until the next release.




-- 
Gregory Stark
As Commitfest Manager
From 87424880f1a970448979681684e6916f33567eeb Mon Sep 17 00:00:00 2001
From: Greg Stark <st...@mit.edu>
Date: Mon, 3 Apr 2023 17:04:11 -0400
Subject: [PATCH] pg_stats and range statistics

---
 doc/src/sgml/func.sgml                        |  36 ++++++
 doc/src/sgml/system-views.sgml                |  40 +++++++
 src/backend/catalog/system_views.sql          |  30 ++++-
 src/backend/utils/adt/rangetypes_typanalyze.c | 107 ++++++++++++++++++
 src/include/catalog/pg_proc.dat               |  10 ++
 src/test/regress/expected/rangetypes.out      |  22 ++++
 src/test/regress/expected/rules.out           |  34 +++++-
 src/test/regress/sql/rangetypes.sql           |   7 ++
 8 files changed, 284 insertions(+), 2 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 918a492234..548078a12e 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -19643,6 +19643,24 @@ SELECT NULLIF(value, '(none)') ...
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>ranges_lower</primary>
+        </indexterm>
+        <function>ranges_lower</function> ( <type>anyarray</type> )
+        <returnvalue>anyarray</returnvalue>
+       </para>
+       <para>
+        Extracts lower bounds of ranges in the array (<literal>NULL</literal> if
+        the range is empty or the lower bound is infinite).
+       </para>
+       <para>
+        <literal>lower(ARRAY[numrange(1.1,2.2),numrange(3.3,4.4)])</literal>
+        <returnvalue>{1.1,3.3}</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
@@ -19661,6 +19679,24 @@ SELECT NULLIF(value, '(none)') ...
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>ranges_upper</primary>
+        </indexterm>
+        <function>ranges_upper</function> ( <type>anyarray</type> )
+        <returnvalue>anyarray</returnvalue>
+       </para>
+       <para>
+        Extracts upper bounds of ranges (<literal>NULL</literal> if the
+        range is empty or the upper bound is infinite).
+       </para>
+       <para>
+        <literal>upper(ARRAY[numrange(1.1,2.2),numrange(3.3,4.4)])</literal>
+        <returnvalue>{2.2,4.4}</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
diff --git a/doc/src/sgml/system-views.sgml b/doc/src/sgml/system-views.sgml
index bb1a418450..d7760838ae 100644
--- a/doc/src/sgml/system-views.sgml
+++ b/doc/src/sgml/system-views.sgml
@@ -3784,6 +3784,46 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
        non-null elements.  (Null for scalar types.)
       </para></entry>
      </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>range_length_histogram</structfield> <type>anyarray</type>
+      </para>
+      <para>
+       A histogram of the lengths of non-empty and non-null range values of a
+       range type column. (Null for non-range types.)
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>range_empty_frac</structfield> <type>float4</type>
+      </para>
+      <para>
+       Fraction of column entries whose values are empty ranges.
+       (Null for non-range types.)
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>range_lower_histogram</structfield> <type>anyarray</type>
+      </para>
+      <para>
+       A histogram of lower bounds of non-empty and non-null range values.
+       (Null for non-range types.)
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>range_upper_histogram</structfield> <type>anyarray</type>
+      </para>
+      <para>
+       A histogram of upper bounds of non-empty and non-null range values.
+       (Null for non-range types.)
+      </para></entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 574cbc2e44..3fb7ee448f 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -243,7 +243,35 @@ CREATE VIEW pg_stats WITH (security_barrier) AS
             WHEN stakind3 = 5 THEN stanumbers3
             WHEN stakind4 = 5 THEN stanumbers4
             WHEN stakind5 = 5 THEN stanumbers5
-        END AS elem_count_histogram
+        END AS elem_count_histogram,
+        CASE
+            WHEN stakind1 = 6 THEN stanumbers1[1]
+            WHEN stakind2 = 6 THEN stanumbers2[1]
+            WHEN stakind3 = 6 THEN stanumbers3[1]
+            WHEN stakind4 = 6 THEN stanumbers4[1]
+            WHEN stakind5 = 6 THEN stanumbers5[1]
+        END AS range_empty_frac,
+        CASE
+            WHEN stakind1 = 6 THEN stavalues1
+            WHEN stakind2 = 6 THEN stavalues2
+            WHEN stakind3 = 6 THEN stavalues3
+            WHEN stakind4 = 6 THEN stavalues4
+            WHEN stakind5 = 6 THEN stavalues5
+        END AS range_length_histogram,
+        CASE
+            WHEN stakind1 = 7 THEN ranges_lower(stavalues1)
+            WHEN stakind2 = 7 THEN ranges_lower(stavalues2)
+            WHEN stakind3 = 7 THEN ranges_lower(stavalues3)
+            WHEN stakind4 = 7 THEN ranges_lower(stavalues4)
+            WHEN stakind5 = 7 THEN ranges_lower(stavalues5)
+            END AS range_lower_histogram,
+        CASE
+            WHEN stakind1 = 7 THEN ranges_upper(stavalues1)
+            WHEN stakind2 = 7 THEN ranges_upper(stavalues2)
+            WHEN stakind3 = 7 THEN ranges_upper(stavalues3)
+            WHEN stakind4 = 7 THEN ranges_upper(stavalues4)
+            WHEN stakind5 = 7 THEN ranges_upper(stavalues5)
+            END AS range_upper_histogram
     FROM pg_statistic s JOIN pg_class c ON (c.oid = s.starelid)
          JOIN pg_attribute a ON (c.oid = attrelid AND attnum = s.staattnum)
          LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace)
diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c b/src/backend/utils/adt/rangetypes_typanalyze.c
index 86810a1a6e..a288ee3ebc 100644
--- a/src/backend/utils/adt/rangetypes_typanalyze.c
+++ b/src/backend/utils/adt/rangetypes_typanalyze.c
@@ -26,6 +26,8 @@
 
 #include "catalog/pg_operator.h"
 #include "commands/vacuum.h"
+#include "utils/array.h"
+#include "utils/arrayaccess.h"
 #include "utils/float.h"
 #include "utils/fmgrprotos.h"
 #include "utils/lsyscache.h"
@@ -38,6 +40,7 @@ static int	range_bound_qsort_cmp(const void *a1, const void *a2, void *arg);
 static void compute_range_stats(VacAttrStats *stats,
 								AnalyzeAttrFetchFunc fetchfunc, int samplerows,
 								double totalrows);
+static Datum ranges_bounds_common(TypeCacheEntry *typentry, AnyArrayType *array, bool need_lower);
 
 /*
  * range_typanalyze -- typanalyze function for range columns
@@ -427,3 +430,107 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
 	 * hashtable should also go away, as it used a child memory context.
 	 */
 }
+
+/*
+ * ranges_bounds_common() -- for ranges_upper_bounds() and ranges_lower_bounds()
+ */
+Datum
+ranges_bounds_common(TypeCacheEntry *typentry, AnyArrayType *array, bool need_lower)
+{
+	int			ndims = AARR_NDIM(array);
+	int		   *dims = AARR_DIMS(array);
+	int		   *lbs = AARR_LBOUND(array);
+	int			i;
+	array_iter	iter;
+	int			nelems;
+	Datum	   *elems;
+	bool	   *nulls;
+	TypeCacheEntry *typentry_element;
+
+	if (typentry->typtype != TYPTYPE_RANGE)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATATYPE_MISMATCH),
+				 errmsg("expected array of ranges")));
+
+	ndims = AARR_NDIM(array);
+	dims = AARR_DIMS(array);
+	nelems = ArrayGetNItems(ndims, dims);
+
+	elems = (Datum *) palloc(nelems * sizeof(Datum));
+	nulls = (bool *) palloc(nelems * sizeof(bool));
+
+	array_iter_setup(&iter, array);
+
+	for (i = 0; i < nelems; i++)
+	{
+		Datum		itemvalue;
+		bool		isnull;
+		RangeBound	lower;
+		RangeBound	upper;
+		RangeBound *bound;
+		bool		empty;
+
+		/* Get source element, checking for NULL */
+		itemvalue = array_iter_next(&iter, &isnull, i,
+									typentry->typlen, typentry->typbyval,
+									typentry->typalign);
+
+		Assert(!isnull);
+
+		range_deserialize(typentry, (RangeType *) itemvalue, &lower, &upper, &empty);
+		bound = need_lower ? &lower : &upper;
+	    /* NULL if there's no finite bound - for consistency with range_lower/range_upper */
+		if (empty || bound->infinite)
+		{
+			nulls[i] = true;
+			elems[i] = (Datum) 0;
+		}
+		else
+		{
+			nulls[i] = false;
+			elems[i] = bound->val;
+		}
+	}
+
+	typentry_element = typentry->rngelemtype;
+
+	PG_RETURN_ARRAYTYPE_P(construct_md_array(elems, nulls,
+						  ndims, dims, lbs,
+						  typentry_element->type_id,
+						  typentry_element->typlen,
+						  typentry_element->typbyval,
+						  typentry_element->typalign));
+}
+
+/*
+ * ranges_lower_bounds() -- return array of lower bounds for ranges
+ */
+Datum
+ranges_lower_bounds(PG_FUNCTION_ARGS)
+{
+	AnyArrayType   *array = PG_GETARG_ANY_ARRAY_P(0);
+	Oid				element_type = AARR_ELEMTYPE(array);
+	TypeCacheEntry *typentry;
+
+	/* Get information about range type; note column might be a domain */
+	typentry = range_get_typcache(fcinfo, getBaseType(element_type));
+
+	return ranges_bounds_common(typentry, array, true);
+}
+
+/*
+ * ranges_upper_bounds() -- return array of upper bounds for ranges
+ */
+Datum
+ranges_upper_bounds(PG_FUNCTION_ARGS)
+{
+	AnyArrayType   *array = PG_GETARG_ANY_ARRAY_P(0);
+	Oid				element_type = AARR_ELEMTYPE(array);
+	TypeCacheEntry *typentry;
+
+	/* Get information about range type; note column might be a domain */
+	typentry = range_get_typcache(fcinfo, getBaseType(element_type));
+
+	return ranges_bounds_common(typentry, array, false);
+}
+
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index f9f2642201..b986610f09 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -12053,4 +12053,14 @@
   proname => 'any_value_transfn', prorettype => 'anyelement',
   proargtypes => 'anyelement anyelement', prosrc => 'any_value_transfn' },
 
+{ oid => '9693', descr => 'lower bounds of ranges',
+  proname => 'ranges_lower', provolatile => 's',
+  prorettype => 'anyarray', proargtypes => 'anyarray',
+  prosrc => 'ranges_lower_bounds' },
+
+{ oid => '9694', descr => 'upper bounds of ranges',
+  proname => 'ranges_upper', provolatile => 's',
+  prorettype => 'anyarray', proargtypes => 'anyarray',
+  prosrc => 'ranges_upper_bounds' },
+
 ]
diff --git a/src/test/regress/expected/rangetypes.out b/src/test/regress/expected/rangetypes.out
index ee02ff0163..6d7461179f 100644
--- a/src/test/regress/expected/rangetypes.out
+++ b/src/test/regress/expected/rangetypes.out
@@ -597,6 +597,22 @@ select range_intersect_agg(nr) from numrange_test where nr @> 4.0;
  [3,5)
 (1 row)
 
+select ranges_lower(ARRAY[1,2,3]); -- should fail
+ERROR:  type 23 is not a range type
+select ranges_lower(array_agg(nr order by nr)), ranges_upper(array_agg(nr order by nr)) from numrange_test;
+        ranges_lower        |        ranges_upper        
+----------------------------+----------------------------
+ {NULL,NULL,NULL,1.1,1.7,3} | {NULL,5,NULL,2.2,1.7,NULL}
+(1 row)
+
+select ranges_lower(a), ranges_upper(a) from (
+  select ARRAY[ array_agg(nr order by nr), array_agg(nr order by nr desc) ] from numrange_test
+) t(a);
+                      ranges_lower                       |                      ranges_upper                       
+---------------------------------------------------------+---------------------------------------------------------
+ {{NULL,NULL,NULL,1.1,1.7,3},{3,1.7,1.1,NULL,NULL,NULL}} | {{NULL,5,NULL,2.2,1.7,NULL},{NULL,1.7,2.2,NULL,5,NULL}}
+(1 row)
+
 analyze numrange_test;
 create table numrange_test2(nr numrange);
 create index numrange_test2_hash_idx on numrange_test2 using hash (nr);
@@ -751,6 +767,12 @@ select * from textrange_test where tr < 'empty';
 ----
 (0 rows)
 
+select ranges_lower(array_agg(tr order by tr)), ranges_upper(array_agg(tr order by tr)) from textrange_test;
+      ranges_lower      |      ranges_upper      
+------------------------+------------------------
+ {NULL,NULL,NULL,a,b,d} | {NULL,q,NULL,NULL,g,d}
+(1 row)
+
 -- test canonical form for int4range
 select int4range(1, 10, '[]');
  int4range 
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index ab1aebfde4..7c783ce47a 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2455,7 +2455,39 @@ pg_stats| SELECT n.nspname AS schemaname,
             WHEN (s.stakind4 = 5) THEN s.stanumbers4
             WHEN (s.stakind5 = 5) THEN s.stanumbers5
             ELSE NULL::real[]
-        END AS elem_count_histogram
+        END AS elem_count_histogram,
+        CASE
+            WHEN (s.stakind1 = 6) THEN s.stanumbers1[1]
+            WHEN (s.stakind2 = 6) THEN s.stanumbers2[1]
+            WHEN (s.stakind3 = 6) THEN s.stanumbers3[1]
+            WHEN (s.stakind4 = 6) THEN s.stanumbers4[1]
+            WHEN (s.stakind5 = 6) THEN s.stanumbers5[1]
+            ELSE NULL::real
+        END AS range_empty_frac,
+        CASE
+            WHEN (s.stakind1 = 6) THEN s.stavalues1
+            WHEN (s.stakind2 = 6) THEN s.stavalues2
+            WHEN (s.stakind3 = 6) THEN s.stavalues3
+            WHEN (s.stakind4 = 6) THEN s.stavalues4
+            WHEN (s.stakind5 = 6) THEN s.stavalues5
+            ELSE NULL::anyarray
+        END AS range_length_histogram,
+        CASE
+            WHEN (s.stakind1 = 7) THEN ranges_lower(s.stavalues1)
+            WHEN (s.stakind2 = 7) THEN ranges_lower(s.stavalues2)
+            WHEN (s.stakind3 = 7) THEN ranges_lower(s.stavalues3)
+            WHEN (s.stakind4 = 7) THEN ranges_lower(s.stavalues4)
+            WHEN (s.stakind5 = 7) THEN ranges_lower(s.stavalues5)
+            ELSE NULL::anyarray
+        END AS range_lower_histogram,
+        CASE
+            WHEN (s.stakind1 = 7) THEN ranges_upper(s.stavalues1)
+            WHEN (s.stakind2 = 7) THEN ranges_upper(s.stavalues2)
+            WHEN (s.stakind3 = 7) THEN ranges_upper(s.stavalues3)
+            WHEN (s.stakind4 = 7) THEN ranges_upper(s.stavalues4)
+            WHEN (s.stakind5 = 7) THEN ranges_upper(s.stavalues5)
+            ELSE NULL::anyarray
+        END AS range_upper_histogram
    FROM (((pg_statistic s
      JOIN pg_class c ON ((c.oid = s.starelid)))
      JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum))))
diff --git a/src/test/regress/sql/rangetypes.sql b/src/test/regress/sql/rangetypes.sql
index c23be928c3..8948ad3e58 100644
--- a/src/test/regress/sql/rangetypes.sql
+++ b/src/test/regress/sql/rangetypes.sql
@@ -133,6 +133,12 @@ select range_intersect_agg(nr) from numrange_test;
 select range_intersect_agg(nr) from numrange_test where false;
 select range_intersect_agg(nr) from numrange_test where nr @> 4.0;
 
+select ranges_lower(ARRAY[1,2,3]); -- should fail
+select ranges_lower(array_agg(nr order by nr)), ranges_upper(array_agg(nr order by nr)) from numrange_test;
+select ranges_lower(a), ranges_upper(a) from (
+  select ARRAY[ array_agg(nr order by nr), array_agg(nr order by nr desc) ] from numrange_test
+) t(a);
+
 analyze numrange_test;
 
 create table numrange_test2(nr numrange);
@@ -195,6 +201,7 @@ select * from textrange_test where tr = '("b","g")';
 select * from textrange_test where tr = '["b","g")';
 select * from textrange_test where tr < 'empty';
 
+select ranges_lower(array_agg(tr order by tr)), ranges_upper(array_agg(tr order by tr)) from textrange_test;
 
 -- test canonical form for int4range
 select int4range(1, 10, '[]');
-- 
2.40.0

Reply via email to