Re: A qsort template

Thomas Munro Sat, 02 Apr 2022 22:47:24 -0700

On Sun, Apr 3, 2022 at 11:11 AM Andres Freund <and...@anarazel.de> wrote:
> On 2022-04-03 09:45:13 +1200, Thomas Munro wrote:
> > I think we just need to decide up front if we're in a situation that
> > can't provide datum1/isnull1 (in this case because it's an expression
> > index), and skip the optimised paths.  Here's an experimental patch...
> > still looking into whether there are more cases like this...


I didn't find anything else.

Maybe it'd be better if we explicitly declared whether datum1 is used
in each tuplesort mode's 'begin' function, right next to the code that
installs the set of routines that are in control of that?  Trying that
in this version.  Is it clearer what's going on like this?

> That's a lot of redundant checks. How about putting all the checks for
> optimized paths into one if (state->sortKeys && !state->disabl1e_datum1)?

OK, sure.

> I'm a bit worried that none of the !ubsan tests failed on this...

In accordance with whoever-it-was-that-said-that's law about things
that aren't tested, this are turned out to be broken already[1].  Once
we fix that we should have a new test in the three that might also
eventually have failed under this UB, given enough chaos.

[1] 
https://www.postgresql.org/message-id/CA%2BhUKG%2BbA%2BbmwD36_oDxAoLrCwZjVtST2fqe%3Db4%3DqZcmU7u89A%40mail.gmail.com

From b0a79f91edc6ce305f77373b92f31100fcad07d5 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Sun, 3 Apr 2022 09:41:04 +1200
Subject: [PATCH v2] Fix tuplesort optimizations for expression-based CLUSTER.

When redirecting sorts to specialized variants, commit 69749243 failed
to handle the case where CLUSTER-sort decides not to initialize datum1
and isnull1.  Fix by hoisting that decision up a level and advertising
whether datum1 can be used, in the Tuplesortstate object.

Per reports from UBsan and Valgrind while running the cluster.sql test.

Reviewed-by: Andres Freund <and...@anarazel.de>
Discussion: https://postgr.es/m/CAFBsxsF1TeK5Fic0M%2BTSJXzbKsY6aBqJGNj6ptURuB09ZF6k_w%40mail.gmail.com
---
 src/backend/utils/sort/tuplesort.c | 73 ++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 20 deletions(-)

diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 361527098f..58441ed638 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -306,6 +306,12 @@ struct Tuplesortstate
 	void		(*readtup) (Tuplesortstate *state, SortTuple *stup,
 							LogicalTape *tape, unsigned int len);
 
+	/*
+	 * Whether SortTuple's datum1 and isnull1 members are maintained by the
+	 * above routines.  If not, some sort specializations are disabled.
+	 */
+	bool		haveDatum1;
+
 	/*
 	 * This array holds the tuples now in sort memory.  If we are in state
 	 * INITIAL, the tuples are in no particular order; if we are in state
@@ -1016,6 +1022,7 @@ tuplesort_begin_heap(TupleDesc tupDesc,
 	state->copytup = copytup_heap;
 	state->writetup = writetup_heap;
 	state->readtup = readtup_heap;
+	state->haveDatum1 = true;
 
 	state->tupDesc = tupDesc;	/* assume we need not copy tupDesc */
 	state->abbrevNext = 10;
@@ -1095,6 +1102,15 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
 
 	state->indexInfo = BuildIndexInfo(indexRel);
 
+	/*
+	 * If we don't have a simple leading attribute, we can't easily initialize
+	 * datum1, so disable optimizations based on datum1.
+	 */
+	if (state->indexInfo->ii_IndexAttrNumbers[0] == 0)
+		state->haveDatum1 = false;
+	else
+		state->haveDatum1 = true;
+
 	state->tupDesc = tupDesc;	/* assume we need not copy tupDesc */
 
 	indexScanKey = _bt_mkscankey(indexRel, NULL);
@@ -1188,6 +1204,7 @@ tuplesort_begin_index_btree(Relation heapRel,
 	state->writetup = writetup_index;
 	state->readtup = readtup_index;
 	state->abbrevNext = 10;
+	state->haveDatum1 = true;
 
 	state->heapRel = heapRel;
 	state->indexRel = indexRel;
@@ -1262,6 +1279,7 @@ tuplesort_begin_index_hash(Relation heapRel,
 	state->copytup = copytup_index;
 	state->writetup = writetup_index;
 	state->readtup = readtup_index;
+	state->haveDatum1 = true;
 
 	state->heapRel = heapRel;
 	state->indexRel = indexRel;
@@ -1302,6 +1320,7 @@ tuplesort_begin_index_gist(Relation heapRel,
 	state->copytup = copytup_index;
 	state->writetup = writetup_index;
 	state->readtup = readtup_index;
+	state->haveDatum1 = true;
 
 	state->heapRel = heapRel;
 	state->indexRel = indexRel;
@@ -1366,6 +1385,7 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
 	state->writetup = writetup_datum;
 	state->readtup = readtup_datum;
 	state->abbrevNext = 10;
+	state->haveDatum1 = true;
 
 	state->datumType = datumType;
 
@@ -3593,27 +3613,40 @@ tuplesort_sort_memtuples(Tuplesortstate *state)
 
 	if (state->memtupcount > 1)
 	{
-		/* Do we have a specialization for the leading column's comparator? */
-		if (state->sortKeys &&
-			state->sortKeys[0].comparator == ssup_datum_unsigned_cmp)
-		{
-			elog(DEBUG1, "qsort_tuple_unsigned");
-			qsort_tuple_unsigned(state->memtuples, state->memtupcount, state);
-		}
-		else if (state->sortKeys &&
-				 state->sortKeys[0].comparator == ssup_datum_signed_cmp)
-		{
-			elog(DEBUG1, "qsort_tuple_signed");
-			qsort_tuple_signed(state->memtuples, state->memtupcount, state);
-		}
-		else if (state->sortKeys &&
-				 state->sortKeys[0].comparator == ssup_datum_int32_cmp)
+		/*
+		 * Do we have the leading column's value or abbreviation in datum1,
+		 * and is there a specialization for its comparator?
+		 */
+		if (state->haveDatum1 && state->sortKeys)
 		{
-			elog(DEBUG1, "qsort_tuple_int32");
-			qsort_tuple_int32(state->memtuples, state->memtupcount, state);
+			if (state->sortKeys[0].comparator == ssup_datum_unsigned_cmp)
+			{
+				elog(DEBUG1, "qsort_tuple_unsigned");
+				qsort_tuple_unsigned(state->memtuples,
+									 state->memtupcount,
+									 state);
+				return;
+			}
+			else if (state->sortKeys[0].comparator == ssup_datum_signed_cmp)
+			{
+				elog(DEBUG1, "qsort_tuple_signed");
+				qsort_tuple_signed(state->memtuples,
+								   state->memtupcount,
+								   state);
+				return;
+			}
+			else if (state->sortKeys[0].comparator == ssup_datum_int32_cmp)
+			{
+				elog(DEBUG1, "qsort_tuple_int32");
+				qsort_tuple_int32(state->memtuples,
+								  state->memtupcount,
+								  state);
+				return;
+			}
 		}
+
 		/* Can we use the single-key sort function? */
-		else if (state->onlyKey != NULL)
+		if (state->onlyKey != NULL)
 		{
 			elog(DEBUG1, "qsort_ssup");
 			qsort_ssup(state->memtuples, state->memtupcount,
@@ -4134,7 +4167,7 @@ copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup)
 	 * set up first-column key value, and potentially abbreviate, if it's a
 	 * simple column
 	 */
-	if (state->indexInfo->ii_IndexAttrNumbers[0] == 0)
+	if (!state->haveDatum1)
 		return;
 
 	original = heap_getattr(tuple,
@@ -4229,7 +4262,7 @@ readtup_cluster(Tuplesortstate *state, SortTuple *stup,
 		LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
 	stup->tuple = (void *) tuple;
 	/* set up first-column key value, if it's a simple column */
-	if (state->indexInfo->ii_IndexAttrNumbers[0] != 0)
+	if (state->haveDatum1)
 		stup->datum1 = heap_getattr(tuple,
 									state->indexInfo->ii_IndexAttrNumbers[0],
 									state->tupDesc,
-- 
2.35.1

Re: A qsort template

Reply via email to