Hello,

While testing the patch "Add proper planner support for ORDER BY / DISTINCT 
aggregates" [0] I discovered the performance penalty from adding a sort node 
essentially came from not using the single-datum tuplesort optimization in 
ExecSort (contrary to the sorting done in ExecAgg).

I originally proposed this patch as a companion in the same thread [1], but 
following James suggestion I'm making a separate thread just for this as the 
optimization is worthwhile independently of David's patch: it looks like we 
can expect a 2x speedup on a "select a single ordered column" case.

The patch aimed to be as simple as possible: we only turn this optimization on 
when the tuple being sorted has only one attribute, it is "byval" (so as not 
to incur copies which would be hard to track in the execution tree) and 
unbound (again, not having to deal with copying borrowed datum anywhere).

The attached patch is originally by me, with some cleanup by Ranier Vilela. 
I'm sending Ranier's version here.


[0]: https://commitfest.postgresql.org/33/3164/
[1]: https://www.postgresql.org/message-id/4480689.ObhdGn8bVM%40aivenronan

-- 
Ronan Dunklau
diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c
index b99027e0d7..363f1d90f0 100644
--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -85,16 +85,28 @@ ExecSort(PlanState *pstate)
 
 		outerNode = outerPlanState(node);
 		tupDesc = ExecGetResultType(outerNode);
-
-		tuplesortstate = tuplesort_begin_heap(tupDesc,
-											  plannode->numCols,
-											  plannode->sortColIdx,
-											  plannode->sortOperators,
-											  plannode->collations,
-											  plannode->nullsFirst,
-											  work_mem,
-											  NULL,
-											  node->randomAccess);
+		if (unlikely(tupDesc->natts == 1 &&
+			!node->bounded &&
+			TupleDescAttr(tupDesc, 0)->attbyval))
+		{
+			node->is_single_val = true;
+			tuplesortstate = tuplesort_begin_datum(TupleDescAttr(tupDesc, 0)->atttypid,
+												   plannode->sortOperators[0],
+												   plannode->collations[0],
+												   plannode->nullsFirst[0],
+												   work_mem,
+												   NULL,
+												   node->randomAccess);
+		} else
+			tuplesortstate = tuplesort_begin_heap(tupDesc,
+												  plannode->numCols,
+												  plannode->sortColIdx,
+												  plannode->sortOperators,
+												  plannode->collations,
+												  plannode->nullsFirst,
+												  work_mem,
+												  NULL,
+												  node->randomAccess);
 		if (node->bounded)
 			tuplesort_set_bound(tuplesortstate, node->bound);
 		node->tuplesortstate = (void *) tuplesortstate;
@@ -103,15 +115,27 @@ ExecSort(PlanState *pstate)
 		 * Scan the subplan and feed all the tuples to tuplesort.
 		 */
 
-		for (;;)
-		{
-			slot = ExecProcNode(outerNode);
-
-			if (TupIsNull(slot))
-				break;
-
-			tuplesort_puttupleslot(tuplesortstate, slot);
-		}
+		if (!node->is_single_val)
+			for (;;)
+			{
+				slot = ExecProcNode(outerNode);
+
+				if (TupIsNull(slot))
+					break;
+				tuplesort_puttupleslot(tuplesortstate, slot);
+			}
+		else
+			for (;;)
+			{
+				slot = ExecProcNode(outerNode);
+
+				if (TupIsNull(slot))
+					break;
+				slot_getsomeattrs(slot, 1);
+				tuplesort_putdatum(tuplesortstate,
+								   slot->tts_values[0],
+								   slot->tts_isnull[0]);
+			}
 
 		/*
 		 * Complete the sort.
@@ -150,9 +174,18 @@ ExecSort(PlanState *pstate)
 	 * next fetch from the tuplesort.
 	 */
 	slot = node->ss.ps.ps_ResultTupleSlot;
-	(void) tuplesort_gettupleslot(tuplesortstate,
-								  ScanDirectionIsForward(dir),
-								  false, slot, NULL);
+	if (node->is_single_val)
+	{
+		ExecClearTuple(slot);
+		if (tuplesort_getdatum(tuplesortstate, ScanDirectionIsForward(dir),
+						   &(slot->tts_values[0]), &(slot->tts_isnull[0]), NULL))
+			ExecStoreVirtualTuple(slot);
+	}
+	else
+		(void) tuplesort_gettupleslot(tuplesortstate,
+									  ScanDirectionIsForward(dir),
+									  false, slot, NULL);
+
 	return slot;
 }
 
@@ -191,6 +224,7 @@ ExecInitSort(Sort *node, EState *estate, int eflags)
 	sortstate->bounded = false;
 	sortstate->sort_Done = false;
 	sortstate->tuplesortstate = NULL;
+	sortstate->is_single_val = false;
 
 	/*
 	 * Miscellaneous initialization
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 0ec5509e7e..643f416c54 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2151,6 +2151,7 @@ typedef struct SortState
 	int64		bound_Done;		/* value of bound we did the sort with */
 	void	   *tuplesortstate; /* private state of tuplesort.c */
 	bool		am_worker;		/* are we a worker? */
+	bool		is_single_val;  /* are we using the single value optimization ? */
 	SharedSortInfo *shared_info;	/* one entry per worker */
 } SortState;
 

Reply via email to