Hello,
While testing the patch "Add proper planner support for ORDER BY / DISTINCT
aggregates" [0] I discovered the performance penalty from adding a sort node
essentially came from not using the single-datum tuplesort optimization in
ExecSort (contrary to the sorting done in ExecAgg).
I originally proposed this patch as a companion in the same thread [1], but
following James suggestion I'm making a separate thread just for this as the
optimization is worthwhile independently of David's patch: it looks like we
can expect a 2x speedup on a "select a single ordered column" case.
The patch aimed to be as simple as possible: we only turn this optimization on
when the tuple being sorted has only one attribute, it is "byval" (so as not
to incur copies which would be hard to track in the execution tree) and
unbound (again, not having to deal with copying borrowed datum anywhere).
The attached patch is originally by me, with some cleanup by Ranier Vilela.
I'm sending Ranier's version here.
[0]: https://commitfest.postgresql.org/33/3164/
[1]: https://www.postgresql.org/message-id/4480689.ObhdGn8bVM%40aivenronan
--
Ronan Dunklau
diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c
index b99027e0d7..363f1d90f0 100644
--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -85,16 +85,28 @@ ExecSort(PlanState *pstate)
outerNode = outerPlanState(node);
tupDesc = ExecGetResultType(outerNode);
-
- tuplesortstate = tuplesort_begin_heap(tupDesc,
- plannode->numCols,
- plannode->sortColIdx,
- plannode->sortOperators,
- plannode->collations,
- plannode->nullsFirst,
- work_mem,
- NULL,
- node->randomAccess);
+ if (unlikely(tupDesc->natts == 1 &&
+ !node->bounded &&
+ TupleDescAttr(tupDesc, 0)->attbyval))
+ {
+ node->is_single_val = true;
+ tuplesortstate = tuplesort_begin_datum(TupleDescAttr(tupDesc, 0)->atttypid,
+ plannode->sortOperators[0],
+ plannode->collations[0],
+ plannode->nullsFirst[0],
+ work_mem,
+ NULL,
+ node->randomAccess);
+ } else
+ tuplesortstate = tuplesort_begin_heap(tupDesc,
+ plannode->numCols,
+ plannode->sortColIdx,
+ plannode->sortOperators,
+ plannode->collations,
+ plannode->nullsFirst,
+ work_mem,
+ NULL,
+ node->randomAccess);
if (node->bounded)
tuplesort_set_bound(tuplesortstate, node->bound);
node->tuplesortstate = (void *) tuplesortstate;
@@ -103,15 +115,27 @@ ExecSort(PlanState *pstate)
* Scan the subplan and feed all the tuples to tuplesort.
*/
- for (;;)
- {
- slot = ExecProcNode(outerNode);
-
- if (TupIsNull(slot))
- break;
-
- tuplesort_puttupleslot(tuplesortstate, slot);
- }
+ if (!node->is_single_val)
+ for (;;)
+ {
+ slot = ExecProcNode(outerNode);
+
+ if (TupIsNull(slot))
+ break;
+ tuplesort_puttupleslot(tuplesortstate, slot);
+ }
+ else
+ for (;;)
+ {
+ slot = ExecProcNode(outerNode);
+
+ if (TupIsNull(slot))
+ break;
+ slot_getsomeattrs(slot, 1);
+ tuplesort_putdatum(tuplesortstate,
+ slot->tts_values[0],
+ slot->tts_isnull[0]);
+ }
/*
* Complete the sort.
@@ -150,9 +174,18 @@ ExecSort(PlanState *pstate)
* next fetch from the tuplesort.
*/
slot = node->ss.ps.ps_ResultTupleSlot;
- (void) tuplesort_gettupleslot(tuplesortstate,
- ScanDirectionIsForward(dir),
- false, slot, NULL);
+ if (node->is_single_val)
+ {
+ ExecClearTuple(slot);
+ if (tuplesort_getdatum(tuplesortstate, ScanDirectionIsForward(dir),
+ &(slot->tts_values[0]), &(slot->tts_isnull[0]), NULL))
+ ExecStoreVirtualTuple(slot);
+ }
+ else
+ (void) tuplesort_gettupleslot(tuplesortstate,
+ ScanDirectionIsForward(dir),
+ false, slot, NULL);
+
return slot;
}
@@ -191,6 +224,7 @@ ExecInitSort(Sort *node, EState *estate, int eflags)
sortstate->bounded = false;
sortstate->sort_Done = false;
sortstate->tuplesortstate = NULL;
+ sortstate->is_single_val = false;
/*
* Miscellaneous initialization
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 0ec5509e7e..643f416c54 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2151,6 +2151,7 @@ typedef struct SortState
int64 bound_Done; /* value of bound we did the sort with */
void *tuplesortstate; /* private state of tuplesort.c */
bool am_worker; /* are we a worker? */
+ bool is_single_val; /* are we using the single value optimization ? */
SharedSortInfo *shared_info; /* one entry per worker */
} SortState;