Le jeudi 15 juillet 2021, 16:19:23 CEST David Rowley a écrit :> 
> Ronan's latest results plus John's make me think there's no need to
> separate out the node function as I did in v8.  However, I do think v6
> could learn a little from v8. I think I'd rather see the sort method
> determined in ExecInitSort() rather than ExecSort(). I think
> minimising those few extra instructions in ExecSort() might help the
> L1 instruction cache.
> 

I'm not sure I understand what you expect from moving that to ExecInitSort ? 
Maybe we should also implement the tuplesort_state initialization in 
ExecInitSort ? (not the actual feeding and sorting of course).

Please find attached a v9 just moving the flag setting to ExecInitSort, and my 
apologies if I misunderstood your point.

-- 
Ronan Dunklau
commit 2bf8db23c8ddb53f7dbcba33c60350bda9d703ea
Author: Ronan Dunklau <ronan.dunk...@aiven.io>
Date:   Tue Jul 6 16:34:31 2021 +0200

    Use optimized single-datum tuplesort in ExecSort
    
    Previously, with optimization was only done in ExecAgg.
    Doing this optimization on regular sort nodes provides for a nice
    performance improvement, when the input / output tuple has only one
    attribute.

diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c
index b99027e0d7..861e4c6e9e 100644
--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -29,6 +29,10 @@
  *		which saves the results in a temporary file or memory. After the
  *		initial call, returns a tuple from the file with each call.
  *
+ *		The tuplesort can either occur on the whole tuple (this is the nominal
+ *		case) or, when the input / output tuple consists of only one attribute,
+ *		we switch to the tuplesort_*_datum API, optimized for that specific case.
+ *
  *		Conditions:
  *		  -- none.
  *
@@ -86,31 +90,63 @@ ExecSort(PlanState *pstate)
 		outerNode = outerPlanState(node);
 		tupDesc = ExecGetResultType(outerNode);
 
-		tuplesortstate = tuplesort_begin_heap(tupDesc,
-											  plannode->numCols,
-											  plannode->sortColIdx,
-											  plannode->sortOperators,
-											  plannode->collations,
-											  plannode->nullsFirst,
-											  work_mem,
-											  NULL,
-											  node->randomAccess);
+		/*
+		 * Switch to the tuplesort_*_datum interface when we have only one
+		 * key, as it is much more efficient especially when the type is
+		 * pass-by-value.
+		 */
+		if (node->datumSort)
+		{
+			tuplesortstate = tuplesort_begin_datum(TupleDescAttr(tupDesc, 0)->atttypid,
+												   plannode->sortOperators[0],
+												   plannode->collations[0],
+												   plannode->nullsFirst[0],
+												   work_mem,
+												   NULL,
+												   node->randomAccess);
+		}
+		else
+			tuplesortstate = tuplesort_begin_heap(tupDesc,
+												  plannode->numCols,
+												  plannode->sortColIdx,
+												  plannode->sortOperators,
+												  plannode->collations,
+												  plannode->nullsFirst,
+												  work_mem,
+												  NULL,
+												  node->randomAccess);
 		if (node->bounded)
 			tuplesort_set_bound(tuplesortstate, node->bound);
 		node->tuplesortstate = (void *) tuplesortstate;
 
 		/*
-		 * Scan the subplan and feed all the tuples to tuplesort.
+		 * Scan the subplan and feed all the tuples to tuplesort, using either
+		 * the _putdatum or _puttupleslot API as appropriate.
 		 */
-
-		for (;;)
+		if (node->datumSort)
 		{
-			slot = ExecProcNode(outerNode);
-
-			if (TupIsNull(slot))
-				break;
-
-			tuplesort_puttupleslot(tuplesortstate, slot);
+			for (;;)
+			{
+				slot = ExecProcNode(outerNode);
+
+				if (TupIsNull(slot))
+					break;
+				slot_getsomeattrs(slot, 1);
+				tuplesort_putdatum(tuplesortstate,
+								   slot->tts_values[0],
+								   slot->tts_isnull[0]);
+			}
+		}
+		else
+		{
+			for (;;)
+			{
+				slot = ExecProcNode(outerNode);
+
+				if (TupIsNull(slot))
+					break;
+				tuplesort_puttupleslot(tuplesortstate, slot);
+			}
 		}
 
 		/*
@@ -150,9 +186,18 @@ ExecSort(PlanState *pstate)
 	 * next fetch from the tuplesort.
 	 */
 	slot = node->ss.ps.ps_ResultTupleSlot;
-	(void) tuplesort_gettupleslot(tuplesortstate,
-								  ScanDirectionIsForward(dir),
-								  false, slot, NULL);
+	if (node->datumSort)
+	{
+		ExecClearTuple(slot);
+		if (tuplesort_getdatum(tuplesortstate, ScanDirectionIsForward(dir),
+							   &(slot->tts_values[0]), &(slot->tts_isnull[0]), NULL))
+			ExecStoreVirtualTuple(slot);
+	}
+	else
+		(void) tuplesort_gettupleslot(tuplesortstate,
+									  ScanDirectionIsForward(dir),
+									  false, slot, NULL);
+
 	return slot;
 }
 
@@ -191,6 +236,7 @@ ExecInitSort(Sort *node, EState *estate, int eflags)
 	sortstate->bounded = false;
 	sortstate->sort_Done = false;
 	sortstate->tuplesortstate = NULL;
+	sortstate->datumSort = false;
 
 	/*
 	 * Miscellaneous initialization
@@ -220,6 +266,15 @@ ExecInitSort(Sort *node, EState *estate, int eflags)
 	 */
 	ExecInitResultTupleSlotTL(&sortstate->ss.ps, &TTSOpsMinimalTuple);
 	sortstate->ss.ps.ps_ProjInfo = NULL;
+	
+	/*
+	 * We perform a Datum sort when we're sorting just a single column,
+	 * otherwise we perform a tuple sort.
+	 */
+	if (ExecGetResultType(outerPlanState(sortstate))->natts == 1)
+		sortstate->datumSort = true;
+	else
+		sortstate->datumSort = false;
 
 	SO1_printf("ExecInitSort: %s\n",
 			   "sort node initialized");
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 0ec5509e7e..89f944a9c4 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2151,6 +2151,7 @@ typedef struct SortState
 	int64		bound_Done;		/* value of bound we did the sort with */
 	void	   *tuplesortstate; /* private state of tuplesort.c */
 	bool		am_worker;		/* are we a worker? */
+	bool		datumSort;  /* are we using the single value optimization ? */
 	SharedSortInfo *shared_info;	/* one entry per worker */
 } SortState;
 

Reply via email to