On 10/27/25 16:14, Tomas Vondra wrote: > On 10/21/25 16:43, Tomas Vondra wrote: >> ... >> >> The results seem fairly stable, and the overall trend is clear. It'd be >> great if there were no regressions, but considering how narrow is this >> microbenchmark (and considering the benefits for practical COPY runs), >> I'd say it's probably OK. >> > > The regression for non-pathological cases (like COPY) is bothering me, > even if it's a very narrow microbenchmark and I'd bet it would not be > measurable in practice. Still, it's annoying. > > I wonder if maybe a better solution would be to invent a concept of > tuple slots for the same descriptor, and allow it to be tracked only > once in the resource owner. That'd mean no duplicates, completely > eliminating the pathological case. >
Here's a PoC of this approach. It introduces a couple functions to create/destroy a batch of slots, and uses that for COPY. After allocating a batch with table_slot_batch_create(), the code can get the next slot using table_slot_batch_next(). The descriptor is pinned only once, in table_slot_batch_create(). It needs to add these to multiple "layers", but it's mostly mechanical. It's just a PoC and needs more thought / cleanup. E.g. some of the duplication could be eliminated (if we allowed MakeTupleTableSlot to skip pinning the descriptor). Also, perhaps there should be a way to protect against freeing the batched slots individually. Anyway, the results are unsurprising - it has the same benefits as the earlier patches, but doesn't affect the resownerbench at all. regards -- Tomas Vondra
From b9110bbad383ce32d4dfe1ad03c44138c0d53c09 Mon Sep 17 00:00:00 2001 From: Tomas Vondra <[email protected]> Date: Mon, 27 Oct 2025 19:02:10 +0100 Subject: [PATCH] introduce batch of slots --- src/backend/access/table/tableam.c | 36 +++++++++++++ src/backend/commands/copyfrom.c | 15 ++++-- src/backend/executor/execTuples.c | 84 ++++++++++++++++++++++++++++++ src/include/access/tableam.h | 3 ++ src/include/executor/tuptable.h | 16 ++++++ 5 files changed, 150 insertions(+), 4 deletions(-) diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 5e41404937e..a3e67940443 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -103,6 +103,42 @@ table_slot_create(Relation relation, List **reglist) return slot; } +TupleTableSlotBatch * +table_slot_batch_create(Relation relation, int maxslots) +{ + TupleTableSlotBatch *batch; + + batch = palloc0(offsetof(TupleTableSlotBatch, ttsb_slots) + + maxslots * sizeof(TupleTableSlot *)); + + batch->ttsb_maxslots = maxslots; + batch->ttsb_nslots = 0; + + /* const for optimization purposes, OK to modify at allocation time */ + *((const TupleTableSlotOps **) &batch->ttsb_ops) = table_slot_callbacks(relation); + batch->ttsb_tupleDescriptor = RelationGetDescr(relation); + + PinTupleDesc(batch->ttsb_tupleDescriptor); + + return batch; +} + +TupleTableSlot * +table_slot_batch_next(TupleTableSlotBatch *batch) +{ + TupleTableSlot *slot; + + if (batch->ttsb_nslots == batch->ttsb_maxslots) + elog(ERROR, "not enough slots in the batch"); + + slot = MakeSingleTupleTableSlotBatch(batch->ttsb_tupleDescriptor, + batch->ttsb_ops); + + batch->ttsb_slots[batch->ttsb_nslots++] = slot; + + return slot; +} + /* ---------------------------------------------------------------------------- * Table scan functions. diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 12781963b4f..e435b56fbe1 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -77,6 +77,7 @@ /* Stores multi-insert data related to a single relation in CopyFrom. */ typedef struct CopyMultiInsertBuffer { + TupleTableSlotBatch *batch; TupleTableSlot *slots[MAX_BUFFERED_TUPLES]; /* Array to store tuples */ ResultRelInfo *resultRelInfo; /* ResultRelInfo for 'relid' */ BulkInsertState bistate; /* BulkInsertState for this rel if plain @@ -369,6 +370,7 @@ CopyMultiInsertBufferInit(ResultRelInfo *rri) buffer->resultRelInfo = rri; buffer->bistate = (rri->ri_FdwRoutine == NULL) ? GetBulkInsertState() : NULL; buffer->nused = 0; + buffer->batch = NULL; return buffer; } @@ -621,7 +623,7 @@ CopyMultiInsertBufferCleanup(CopyMultiInsertInfo *miinfo, CopyMultiInsertBuffer *buffer) { ResultRelInfo *resultRelInfo = buffer->resultRelInfo; - int i; + // int i; /* Ensure buffer was flushed */ Assert(buffer->nused == 0); @@ -638,8 +640,10 @@ CopyMultiInsertBufferCleanup(CopyMultiInsertInfo *miinfo, Assert(buffer->bistate == NULL); /* Since we only create slots on demand, just drop the non-null ones. */ - for (i = 0; i < MAX_BUFFERED_TUPLES && buffer->slots[i] != NULL; i++) - ExecDropSingleTupleTableSlot(buffer->slots[i]); + // for (i = 0; i < MAX_BUFFERED_TUPLES && buffer->slots[i] != NULL; i++) + // ExecDropSingleTupleTableSlot(buffer->slots[i]); + if (buffer->batch != NULL) + ExecDropTupleTableSlotBatch(buffer->batch); if (resultRelInfo->ri_FdwRoutine == NULL) table_finish_bulk_insert(resultRelInfo->ri_RelationDesc, @@ -743,8 +747,11 @@ CopyMultiInsertInfoNextFreeSlot(CopyMultiInsertInfo *miinfo, nused = buffer->nused; + if (buffer->batch == NULL) + buffer->batch = table_slot_batch_create(rri->ri_RelationDesc, MAX_BUFFERED_TUPLES); + if (buffer->slots[nused] == NULL) - buffer->slots[nused] = table_slot_create(rri->ri_RelationDesc, NULL); + buffer->slots[nused] = table_slot_batch_next(buffer->batch); return buffer->slots[nused]; } diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index 8e02d68824f..b1f9435ae51 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -1350,6 +1350,57 @@ MakeTupleTableSlot(TupleDesc tupleDesc, return slot; } +TupleTableSlot * +MakeTupleTableSlotBatch(TupleDesc tupleDesc, + const TupleTableSlotOps *tts_ops) +{ + Size basesz, + allocsz; + TupleTableSlot *slot; + + basesz = tts_ops->base_slot_size; + + /* + * When a fixed descriptor is specified, we can reduce overhead by + * allocating the entire slot in one go. + */ + if (tupleDesc) + allocsz = MAXALIGN(basesz) + + MAXALIGN(tupleDesc->natts * sizeof(Datum)) + + MAXALIGN(tupleDesc->natts * sizeof(bool)); + else + allocsz = basesz; + + slot = palloc0(allocsz); + /* const for optimization purposes, OK to modify at allocation time */ + *((const TupleTableSlotOps **) &slot->tts_ops) = tts_ops; + slot->type = T_TupleTableSlot; + slot->tts_flags |= TTS_FLAG_EMPTY; + if (tupleDesc != NULL) + slot->tts_flags |= TTS_FLAG_FIXED; + slot->tts_tupleDescriptor = tupleDesc; + slot->tts_mcxt = CurrentMemoryContext; + slot->tts_nvalid = 0; + + if (tupleDesc != NULL) + { + slot->tts_values = (Datum *) + (((char *) slot) + + MAXALIGN(basesz)); + slot->tts_isnull = (bool *) + (((char *) slot) + + MAXALIGN(basesz) + + MAXALIGN(tupleDesc->natts * sizeof(Datum))); + } + + /* + * And allow slot type specific initialization. + */ + slot->tts_ops->init(slot); + + return slot; +} + /* -------------------------------- * ExecAllocTableSlot * @@ -1458,6 +1509,39 @@ ExecDropSingleTupleTableSlot(TupleTableSlot *slot) pfree(slot); } +TupleTableSlot * +MakeSingleTupleTableSlotBatch(TupleDesc tupdesc, + const TupleTableSlotOps *tts_ops) +{ + TupleTableSlot *slot = MakeTupleTableSlotBatch(tupdesc, tts_ops); + + return slot; +} + +/* FIXME Does this need to match ExecResetTupleTable's processing of one slot? */ +void +ExecDropTupleTableSlotBatch(TupleTableSlotBatch *batch) +{ + for (int i = 0; i < batch->ttsb_nslots; i++) + { + TupleTableSlot *slot = batch->ttsb_slots[i]; + + Assert(IsA(slot, TupleTableSlot)); + ExecClearTuple(slot); + slot->tts_ops->release(slot); + if (!TTS_FIXED(slot)) + { + if (slot->tts_values) + pfree(slot->tts_values); + if (slot->tts_isnull) + pfree(slot->tts_isnull); + } + pfree(slot); + } + + ReleaseTupleDesc(batch->ttsb_tupleDescriptor); +} + /* ---------------------------------------------------------------- * tuple table slot accessor functions diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index e16bf025692..b9500db1129 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -862,6 +862,9 @@ extern const TupleTableSlotOps *table_slot_callbacks(Relation relation); */ extern TupleTableSlot *table_slot_create(Relation relation, List **reglist); +extern TupleTableSlotBatch *table_slot_batch_create(Relation relation, int maxslots); +extern TupleTableSlot *table_slot_batch_next(TupleTableSlotBatch *batch); + /* ---------------------------------------------------------------------------- * Table scan functions. diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h index 43f1d999b91..38a8e170c89 100644 --- a/src/include/executor/tuptable.h +++ b/src/include/executor/tuptable.h @@ -225,6 +225,17 @@ struct TupleTableSlotOps MinimalTuple (*copy_minimal_tuple) (TupleTableSlot *slot, Size extra); }; +/* a batch of tuple table slots */ +typedef struct TupleTableSlotBatch +{ + NodeTag type; + int ttsb_maxslots; + int ttsb_nslots; + const TupleTableSlotOps *const ttsb_ops; /* implementation of slot */ + TupleDesc ttsb_tupleDescriptor; /* slot's tuple descriptor */ + TupleTableSlot *ttsb_slots[FLEXIBLE_ARRAY_MEMBER]; +} TupleTableSlotBatch; + /* * Predefined TupleTableSlotOps for various types of TupleTableSlotOps. The * same are used to identify the type of a given slot. @@ -347,6 +358,11 @@ extern void slot_getmissingattrs(TupleTableSlot *slot, int startAttNum, int lastAttNum); extern void slot_getsomeattrs_int(TupleTableSlot *slot, int attnum); +extern TupleTableSlot *MakeTupleTableSlotBatch(TupleDesc tupleDesc, + const TupleTableSlotOps *tts_ops); +extern TupleTableSlot *MakeSingleTupleTableSlotBatch(TupleDesc tupleDesc, + const TupleTableSlotOps *tts_ops); +extern void ExecDropTupleTableSlotBatch(TupleTableSlotBatch *batch); #ifndef FRONTEND -- 2.51.0
