From 783791442fdb4005cdfb4775f8aad977cbfbfce9 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 27 Sep 2024 13:05:40 +0000
Subject: [PATCH v7] general purpose array_sort

Sorts anyarray in either ascending or descending order.

Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
 doc/src/sgml/func.sgml                        |  32 +++
 src/backend/utils/adt/array_userfuncs.c       | 205 ++++++++++++++++++
 src/include/catalog/pg_proc.dat               |   6 +
 src/test/regress/expected/arrays.out          | 123 +++++++++++
 .../regress/expected/collate.icu.utf8.out     |  13 ++
 src/test/regress/sql/arrays.sql               |  24 ++
 src/test/regress/sql/collate.icu.utf8.sql     |   4 +
 src/tools/pgindent/typedefs.list              |   1 +
 8 files changed, 408 insertions(+)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index ad663c94d7..0414b43523 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20421,6 +20421,38 @@ SELECT NULLIF(value, '(none)') ...
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>array_sort</primary>
+        </indexterm>
+        <function>array_sort</function> ( <type>anyarray</type> <optional> COLLATE <replaceable>collation_name</replaceable> </optional> <optional> , <parameter>dir</parameter> </optional>)
+        <returnvalue>anyarray</returnvalue>
+       </para>
+       <para>
+        Sorts the array based on the given parameter.
+       </para>
+       <para>
+        If the <literal>COLLATE</literal> option is specified then sorting is based on <replaceable>collation_name</replaceable>, otherwise
+        using array element type's collation.
+        <parameter>dir</parameter> must be one of the following, and that the starred options are the defaults when null position is omitted.
+<programlisting>
+ asc
+ desc
+ asc nulls first
+ asc nulls last *
+ desc nulls first *
+ desc nulls last
+ nulls first
+ nulls last
+</programlisting>
+       </para>
+       <para>
+        <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+        <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 6599be2ec5..b6c948e60f 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -12,15 +12,18 @@
  */
 #include "postgres.h"
 
+#include "catalog/namespace.h"
 #include "catalog/pg_type.h"
 #include "common/int.h"
 #include "common/pg_prng.h"
 #include "libpq/pqformat.h"
+#include "miscadmin.h"
 #include "port/pg_bitutils.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/datum.h"
 #include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
 #include "utils/typcache.h"
 
 /*
@@ -1685,3 +1688,205 @@ array_sample(PG_FUNCTION_ARGS)
 
 	PG_RETURN_ARRAYTYPE_P(result);
 }
+
+
+#define WHITESPACE " \t\n\r\v\f"
+
+typedef enum
+{
+	PARSE_SORT_ORDER_INIT,
+	PARSE_SORT_ORDER_DIRECTION_SET,
+	PARSE_SORT_ORDER_NULLS_OPTION,
+	PARSE_SORT_ORDER_ERROR,
+	PARSE_SORT_ORDER_DONE
+} ParseSortOrderState;
+
+static bool
+parse_sort_order(const char *str, bool *sort_asc, bool *nulls_first)
+{
+	char	   *token;
+	char	   *saveptr;
+	char	   *str_copy = pstrdup(str);
+	bool		nulls_first_set = false;
+	ParseSortOrderState state = PARSE_SORT_ORDER_INIT;
+
+	token = strtok_r(str_copy, WHITESPACE, &saveptr);
+
+	while (token != NULL && state != PARSE_SORT_ORDER_ERROR)
+	{
+		switch (state)
+		{
+			case PARSE_SORT_ORDER_INIT:
+				if (pg_strcasecmp(token, "ASC") == 0)
+				{
+					*sort_asc = true;
+					state = PARSE_SORT_ORDER_DIRECTION_SET;
+				}
+				else if (pg_strcasecmp(token, "DESC") == 0)
+				{
+					*sort_asc = false;
+					state = PARSE_SORT_ORDER_DIRECTION_SET;
+				}
+				else if (pg_strcasecmp(token, "NULLS") == 0)
+					state = PARSE_SORT_ORDER_NULLS_OPTION;
+				else
+					state = PARSE_SORT_ORDER_ERROR;
+				break;
+
+			case PARSE_SORT_ORDER_DIRECTION_SET:
+				if (pg_strcasecmp(token, "NULLS") == 0)
+					state = PARSE_SORT_ORDER_NULLS_OPTION;
+				else
+					state = PARSE_SORT_ORDER_ERROR;
+				break;
+
+			case PARSE_SORT_ORDER_NULLS_OPTION:
+				if (pg_strcasecmp(token, "FIRST") == 0)
+				{
+					*nulls_first = true;
+					nulls_first_set = true;
+					state = PARSE_SORT_ORDER_DONE;
+				}
+				else if (pg_strcasecmp(token, "LAST") == 0)
+				{
+					*nulls_first = false;
+					nulls_first_set = true;
+					state = PARSE_SORT_ORDER_DONE;
+				}
+				else
+					state = PARSE_SORT_ORDER_ERROR;
+				break;
+
+			case PARSE_SORT_ORDER_DONE:
+				/* No more tokens should be processed after first/last */
+				state = PARSE_SORT_ORDER_ERROR;
+				break;
+
+			default:
+				state = PARSE_SORT_ORDER_ERROR;
+				break;
+		}
+
+		token = strtok_r(NULL, WHITESPACE, &saveptr);
+	}
+
+	if (state == PARSE_SORT_ORDER_INIT ||
+		state == PARSE_SORT_ORDER_DIRECTION_SET)
+		state = PARSE_SORT_ORDER_DONE;
+
+	if (state == PARSE_SORT_ORDER_NULLS_OPTION)
+		state = PARSE_SORT_ORDER_ERROR;
+
+	if (!nulls_first_set && state == PARSE_SORT_ORDER_DONE)
+		*nulls_first = !*sort_asc;
+
+	pfree(str_copy);
+	return state == PARSE_SORT_ORDER_DONE;
+}
+
+/*
+ * array_sort
+ *
+ * Sorts the array in either ascending or descending order.
+ * The array must be empty or one-dimensional.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+	ArrayType  *array = PG_GETARG_ARRAYTYPE_P(0);
+	text	   *dirstr = (fcinfo->nargs > 1) ? PG_GETARG_TEXT_PP(1) : NULL;
+	bool		sort_asc = true;
+	bool		nulls_first = false;
+	Oid			elmtyp;
+	Oid			array_type;
+	Oid			collation = PG_GET_COLLATION();
+	TypeCacheEntry *typentry;
+	Tuplesortstate *tuplesortstate;
+	ArrayIterator array_iterator;
+	Datum		value;
+	bool		isnull;
+	ArrayBuildStateAny *astate = NULL;
+
+	if (ARR_NDIM(array) < 1)
+		PG_RETURN_ARRAYTYPE_P(array);
+
+	if (dirstr != NULL)
+	{
+		if (!parse_sort_order(text_to_cstring(dirstr), &sort_asc, &nulls_first))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("second parameter must be a valid sort direction")));
+	}
+
+	elmtyp = ARR_ELEMTYPE(array);
+	typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+	if (ARR_NDIM(array) == 1)
+	{
+		if (typentry == NULL || typentry->type_id != elmtyp)
+		{
+			typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+			if ((sort_asc && !OidIsValid(typentry->lt_opr)) ||
+				(!sort_asc && !OidIsValid(typentry->gt_opr)))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_FUNCTION),
+						errmsg("could not identify an ordering operator for type %s",
+								format_type_be(elmtyp))));
+			fcinfo->flinfo->fn_extra = (void *) typentry;
+		}
+	}
+	else
+	{
+		if (typentry == NULL || typentry->typelem != elmtyp)
+		{
+			array_type = get_array_type(elmtyp);
+			if (!OidIsValid(array_type))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_OBJECT),
+						 errmsg("could not find array type for data type %s",
+								format_type_be(elmtyp))));
+			typentry = lookup_type_cache(array_type, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+			if ((sort_asc && !OidIsValid(typentry->lt_opr)) ||
+				(!sort_asc && !OidIsValid(typentry->gt_opr)))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_FUNCTION),
+						 errmsg("could not identify an ordering operator for type %s",
+								format_type_be(elmtyp))));
+			fcinfo->flinfo->fn_extra = (void *) typentry;
+		}
+	}
+
+	tuplesortstate = tuplesort_begin_datum(typentry->type_id,
+										   sort_asc ? typentry->lt_opr : typentry->gt_opr,
+										   collation,
+										   nulls_first, work_mem, NULL, false);
+
+	array_iterator = array_create_iterator(array, ARR_NDIM(array) - 1, NULL);
+	while (array_iterate(array_iterator, &value, &isnull))
+	{
+		tuplesort_putdatum(tuplesortstate, value, isnull);
+	}
+	array_free_iterator(array_iterator);
+
+	/*
+	 * Do the sort.
+	 */
+	tuplesort_performsort(tuplesortstate);
+
+	while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+	{
+		astate = accumArrayResultAny(astate, value, isnull,
+									 typentry->type_id, CurrentMemoryContext);
+	}
+
+	tuplesort_end(tuplesortstate);
+
+	/* Avoid leaking memory when handed toasted input */
+	PG_FREE_IF_COPY(array, 0);
+	PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
+}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+	return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 7c0b74fe05..a92fd3a6c2 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1741,6 +1741,12 @@
 { oid => '6216', descr => 'take samples from array',
   proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray',
   proargtypes => 'anyarray int4', prosrc => 'array_sample' },
+{ oid => '8810', descr => 'sort array',
+  proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+  proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+  proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+  proargtypes => 'anyarray text', prosrc => 'array_sort_order'},
 { oid => '3816', descr => 'array typanalyze',
   proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
   proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index a6d81fd5f9..3c733e5aab 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2703,3 +2703,126 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
 ERROR:  sample size must be between 0 and 6
 SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
 ERROR:  sample size must be between 0 and 6
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort 
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+  array_sort   
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+  array_sort   
+---------------
+ {6,5,4,3,2,1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+        array_sort         
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+        array_sort         
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+        array_sort         
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+        array_sort         
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+      array_sort       
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'asc');
+      array_sort       
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'desc');
+      array_sort       
+-----------------------
+ {foo,bbc,bar,CCC,Abc}
+(1 row)
+
+-- nulls first/last tests
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C");
+         array_sort         
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc');
+         array_sort         
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc');
+         array_sort         
+----------------------------
+ {NULL,foo,bbc,bar,CCC,Abc}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first');
+         array_sort         
+----------------------------
+ {NULL,Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first');
+         array_sort         
+----------------------------
+ {NULL,Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls first');
+         array_sort         
+----------------------------
+ {NULL,Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls last');
+         array_sort         
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls first');
+         array_sort         
+----------------------------
+ {NULL,foo,bbc,bar,CCC,Abc}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls last');
+         array_sort         
+----------------------------
+ {foo,bbc,bar,CCC,Abc,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+     array_sort      
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index faa376e060..aa5fd75e6e 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1338,6 +1338,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
  t        | t
 (1 row)
 
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort 
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort 
+------------
+ {B,a}
+(1 row)
+
 -- test language tags
 CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
 SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 47058dfde5..821c4eab0b 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -827,3 +827,27 @@ SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]
 SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2));
 SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
 SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'asc');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'desc');
+-- nulls first/last tests
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls first');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls last');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls first');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls last');
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 80f28a97d7..3c739d332b 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -536,6 +536,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
 SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
 SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
 
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
 -- test language tags
 CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
 SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 57de1acff3..f085b3cf87 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2025,6 +2025,7 @@ ParseLoc
 ParseNamespaceColumn
 ParseNamespaceItem
 ParseParamRefHook
+ParseSortOrderState
 ParseState
 ParsedLex
 ParsedScript
-- 
2.39.5

