Le jeudi 15 juillet 2021, 16:19:23 CEST David Rowley a écrit :>
> Ronan's latest results plus John's make me think there's no need to
> separate out the node function as I did in v8. However, I do think v6
> could learn a little from v8. I think I'd rather see the sort method
> determined in ExecInitSort() rather than ExecSort(). I think
> minimising those few extra instructions in ExecSort() might help the
> L1 instruction cache.
>
I'm not sure I understand what you expect from moving that to ExecInitSort ?
Maybe we should also implement the tuplesort_state initialization in
ExecInitSort ? (not the actual feeding and sorting of course).
Please find attached a v9 just moving the flag setting to ExecInitSort, and my
apologies if I misunderstood your point.
--
Ronan Dunklau
commit 2bf8db23c8ddb53f7dbcba33c60350bda9d703ea
Author: Ronan Dunklau <ronan.dunk...@aiven.io>
Date: Tue Jul 6 16:34:31 2021 +0200
Use optimized single-datum tuplesort in ExecSort
Previously, with optimization was only done in ExecAgg.
Doing this optimization on regular sort nodes provides for a nice
performance improvement, when the input / output tuple has only one
attribute.
diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c
index b99027e0d7..861e4c6e9e 100644
--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -29,6 +29,10 @@
* which saves the results in a temporary file or memory. After the
* initial call, returns a tuple from the file with each call.
*
+ * The tuplesort can either occur on the whole tuple (this is the nominal
+ * case) or, when the input / output tuple consists of only one attribute,
+ * we switch to the tuplesort_*_datum API, optimized for that specific case.
+ *
* Conditions:
* -- none.
*
@@ -86,31 +90,63 @@ ExecSort(PlanState *pstate)
outerNode = outerPlanState(node);
tupDesc = ExecGetResultType(outerNode);
- tuplesortstate = tuplesort_begin_heap(tupDesc,
- plannode->numCols,
- plannode->sortColIdx,
- plannode->sortOperators,
- plannode->collations,
- plannode->nullsFirst,
- work_mem,
- NULL,
- node->randomAccess);
+ /*
+ * Switch to the tuplesort_*_datum interface when we have only one
+ * key, as it is much more efficient especially when the type is
+ * pass-by-value.
+ */
+ if (node->datumSort)
+ {
+ tuplesortstate = tuplesort_begin_datum(TupleDescAttr(tupDesc, 0)->atttypid,
+ plannode->sortOperators[0],
+ plannode->collations[0],
+ plannode->nullsFirst[0],
+ work_mem,
+ NULL,
+ node->randomAccess);
+ }
+ else
+ tuplesortstate = tuplesort_begin_heap(tupDesc,
+ plannode->numCols,
+ plannode->sortColIdx,
+ plannode->sortOperators,
+ plannode->collations,
+ plannode->nullsFirst,
+ work_mem,
+ NULL,
+ node->randomAccess);
if (node->bounded)
tuplesort_set_bound(tuplesortstate, node->bound);
node->tuplesortstate = (void *) tuplesortstate;
/*
- * Scan the subplan and feed all the tuples to tuplesort.
+ * Scan the subplan and feed all the tuples to tuplesort, using either
+ * the _putdatum or _puttupleslot API as appropriate.
*/
-
- for (;;)
+ if (node->datumSort)
{
- slot = ExecProcNode(outerNode);
-
- if (TupIsNull(slot))
- break;
-
- tuplesort_puttupleslot(tuplesortstate, slot);
+ for (;;)
+ {
+ slot = ExecProcNode(outerNode);
+
+ if (TupIsNull(slot))
+ break;
+ slot_getsomeattrs(slot, 1);
+ tuplesort_putdatum(tuplesortstate,
+ slot->tts_values[0],
+ slot->tts_isnull[0]);
+ }
+ }
+ else
+ {
+ for (;;)
+ {
+ slot = ExecProcNode(outerNode);
+
+ if (TupIsNull(slot))
+ break;
+ tuplesort_puttupleslot(tuplesortstate, slot);
+ }
}
/*
@@ -150,9 +186,18 @@ ExecSort(PlanState *pstate)
* next fetch from the tuplesort.
*/
slot = node->ss.ps.ps_ResultTupleSlot;
- (void) tuplesort_gettupleslot(tuplesortstate,
- ScanDirectionIsForward(dir),
- false, slot, NULL);
+ if (node->datumSort)
+ {
+ ExecClearTuple(slot);
+ if (tuplesort_getdatum(tuplesortstate, ScanDirectionIsForward(dir),
+ &(slot->tts_values[0]), &(slot->tts_isnull[0]), NULL))
+ ExecStoreVirtualTuple(slot);
+ }
+ else
+ (void) tuplesort_gettupleslot(tuplesortstate,
+ ScanDirectionIsForward(dir),
+ false, slot, NULL);
+
return slot;
}
@@ -191,6 +236,7 @@ ExecInitSort(Sort *node, EState *estate, int eflags)
sortstate->bounded = false;
sortstate->sort_Done = false;
sortstate->tuplesortstate = NULL;
+ sortstate->datumSort = false;
/*
* Miscellaneous initialization
@@ -220,6 +266,15 @@ ExecInitSort(Sort *node, EState *estate, int eflags)
*/
ExecInitResultTupleSlotTL(&sortstate->ss.ps, &TTSOpsMinimalTuple);
sortstate->ss.ps.ps_ProjInfo = NULL;
+
+ /*
+ * We perform a Datum sort when we're sorting just a single column,
+ * otherwise we perform a tuple sort.
+ */
+ if (ExecGetResultType(outerPlanState(sortstate))->natts == 1)
+ sortstate->datumSort = true;
+ else
+ sortstate->datumSort = false;
SO1_printf("ExecInitSort: %s\n",
"sort node initialized");
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 0ec5509e7e..89f944a9c4 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2151,6 +2151,7 @@ typedef struct SortState
int64 bound_Done; /* value of bound we did the sort with */
void *tuplesortstate; /* private state of tuplesort.c */
bool am_worker; /* are we a worker? */
+ bool datumSort; /* are we using the single value optimization ? */
SharedSortInfo *shared_info; /* one entry per worker */
} SortState;