Re: Optimizing ResouceOwner to speed up COPY

Tomas Vondra Mon, 27 Oct 2025 13:43:02 -0700

On 10/27/25 16:14, Tomas Vondra wrote:
> On 10/21/25 16:43, Tomas Vondra wrote:
>> ...
>>
>> The results seem fairly stable, and the overall trend is clear. It'd be
>> great if there were no regressions, but considering how narrow is this
>> microbenchmark (and considering the benefits for practical COPY runs),
>> I'd say it's probably OK.
>>
> 
> The regression for non-pathological cases (like COPY) is bothering me,
> even if it's a very narrow microbenchmark and I'd bet it would not be
> measurable in practice. Still, it's annoying.
> 
> I wonder if maybe a better solution would be to invent a concept of
> tuple slots for the same descriptor, and allow it to be tracked only
> once in the resource owner. That'd mean no duplicates, completely
> eliminating the pathological case.
>


Here's a PoC of this approach. It introduces a couple functions to
create/destroy a batch of slots, and uses that for COPY.

After allocating a batch with table_slot_batch_create(), the code can
get the next slot using table_slot_batch_next(). The descriptor is
pinned only once, in table_slot_batch_create().

It needs to add these to multiple "layers", but it's mostly mechanical.
It's just a PoC and needs more thought / cleanup. E.g. some of the
duplication could be eliminated (if we allowed MakeTupleTableSlot to
skip pinning the descriptor). Also, perhaps there should be a way to
protect against freeing the batched slots individually.

Anyway, the results are unsurprising - it has the same benefits as the
earlier patches, but doesn't affect the resownerbench at all.


regards

-- 
Tomas Vondra

From b9110bbad383ce32d4dfe1ad03c44138c0d53c09 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <[email protected]>
Date: Mon, 27 Oct 2025 19:02:10 +0100
Subject: [PATCH] introduce batch of slots

---
 src/backend/access/table/tableam.c | 36 +++++++++++++
 src/backend/commands/copyfrom.c    | 15 ++++--
 src/backend/executor/execTuples.c  | 84 ++++++++++++++++++++++++++++++
 src/include/access/tableam.h       |  3 ++
 src/include/executor/tuptable.h    | 16 ++++++
 5 files changed, 150 insertions(+), 4 deletions(-)

diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index 5e41404937e..a3e67940443 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -103,6 +103,42 @@ table_slot_create(Relation relation, List **reglist)
 	return slot;
 }
 
+TupleTableSlotBatch *
+table_slot_batch_create(Relation relation, int maxslots)
+{
+	TupleTableSlotBatch *batch;
+
+	batch = palloc0(offsetof(TupleTableSlotBatch, ttsb_slots) +
+					maxslots * sizeof(TupleTableSlot *));
+
+	batch->ttsb_maxslots = maxslots;
+	batch->ttsb_nslots = 0;
+
+	/* const for optimization purposes, OK to modify at allocation time */
+	*((const TupleTableSlotOps **) &batch->ttsb_ops) = table_slot_callbacks(relation);
+	batch->ttsb_tupleDescriptor = RelationGetDescr(relation);
+
+	PinTupleDesc(batch->ttsb_tupleDescriptor);
+
+	return batch;
+}
+
+TupleTableSlot *
+table_slot_batch_next(TupleTableSlotBatch *batch)
+{
+	TupleTableSlot *slot;
+
+	if (batch->ttsb_nslots == batch->ttsb_maxslots)
+		elog(ERROR, "not enough slots in the batch");
+
+	slot = MakeSingleTupleTableSlotBatch(batch->ttsb_tupleDescriptor,
+										 batch->ttsb_ops);
+
+	batch->ttsb_slots[batch->ttsb_nslots++] = slot;
+
+	return slot;
+}
+
 
 /* ----------------------------------------------------------------------------
  * Table scan functions.
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 12781963b4f..e435b56fbe1 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -77,6 +77,7 @@
 /* Stores multi-insert data related to a single relation in CopyFrom. */
 typedef struct CopyMultiInsertBuffer
 {
+	TupleTableSlotBatch *batch;
 	TupleTableSlot *slots[MAX_BUFFERED_TUPLES]; /* Array to store tuples */
 	ResultRelInfo *resultRelInfo;	/* ResultRelInfo for 'relid' */
 	BulkInsertState bistate;	/* BulkInsertState for this rel if plain
@@ -369,6 +370,7 @@ CopyMultiInsertBufferInit(ResultRelInfo *rri)
 	buffer->resultRelInfo = rri;
 	buffer->bistate = (rri->ri_FdwRoutine == NULL) ? GetBulkInsertState() : NULL;
 	buffer->nused = 0;
+	buffer->batch = NULL;
 
 	return buffer;
 }
@@ -621,7 +623,7 @@ CopyMultiInsertBufferCleanup(CopyMultiInsertInfo *miinfo,
 							 CopyMultiInsertBuffer *buffer)
 {
 	ResultRelInfo *resultRelInfo = buffer->resultRelInfo;
-	int			i;
+	// int			i;
 
 	/* Ensure buffer was flushed */
 	Assert(buffer->nused == 0);
@@ -638,8 +640,10 @@ CopyMultiInsertBufferCleanup(CopyMultiInsertInfo *miinfo,
 		Assert(buffer->bistate == NULL);
 
 	/* Since we only create slots on demand, just drop the non-null ones. */
-	for (i = 0; i < MAX_BUFFERED_TUPLES && buffer->slots[i] != NULL; i++)
-		ExecDropSingleTupleTableSlot(buffer->slots[i]);
+	// for (i = 0; i < MAX_BUFFERED_TUPLES && buffer->slots[i] != NULL; i++)
+	//	ExecDropSingleTupleTableSlot(buffer->slots[i]);
+	if (buffer->batch != NULL)
+		ExecDropTupleTableSlotBatch(buffer->batch);
 
 	if (resultRelInfo->ri_FdwRoutine == NULL)
 		table_finish_bulk_insert(resultRelInfo->ri_RelationDesc,
@@ -743,8 +747,11 @@ CopyMultiInsertInfoNextFreeSlot(CopyMultiInsertInfo *miinfo,
 
 	nused = buffer->nused;
 
+	if (buffer->batch == NULL)
+		buffer->batch = table_slot_batch_create(rri->ri_RelationDesc, MAX_BUFFERED_TUPLES);
+
 	if (buffer->slots[nused] == NULL)
-		buffer->slots[nused] = table_slot_create(rri->ri_RelationDesc, NULL);
+		buffer->slots[nused] = table_slot_batch_next(buffer->batch);
 	return buffer->slots[nused];
 }
 
diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c
index 8e02d68824f..b1f9435ae51 100644
--- a/src/backend/executor/execTuples.c
+++ b/src/backend/executor/execTuples.c
@@ -1350,6 +1350,57 @@ MakeTupleTableSlot(TupleDesc tupleDesc,
 	return slot;
 }
 
+TupleTableSlot *
+MakeTupleTableSlotBatch(TupleDesc tupleDesc,
+						const TupleTableSlotOps *tts_ops)
+{
+	Size		basesz,
+				allocsz;
+	TupleTableSlot *slot;
+
+	basesz = tts_ops->base_slot_size;
+
+	/*
+	 * When a fixed descriptor is specified, we can reduce overhead by
+	 * allocating the entire slot in one go.
+	 */
+	if (tupleDesc)
+		allocsz = MAXALIGN(basesz) +
+			MAXALIGN(tupleDesc->natts * sizeof(Datum)) +
+			MAXALIGN(tupleDesc->natts * sizeof(bool));
+	else
+		allocsz = basesz;
+
+	slot = palloc0(allocsz);
+	/* const for optimization purposes, OK to modify at allocation time */
+	*((const TupleTableSlotOps **) &slot->tts_ops) = tts_ops;
+	slot->type = T_TupleTableSlot;
+	slot->tts_flags |= TTS_FLAG_EMPTY;
+	if (tupleDesc != NULL)
+		slot->tts_flags |= TTS_FLAG_FIXED;
+	slot->tts_tupleDescriptor = tupleDesc;
+	slot->tts_mcxt = CurrentMemoryContext;
+	slot->tts_nvalid = 0;
+
+	if (tupleDesc != NULL)
+	{
+		slot->tts_values = (Datum *)
+			(((char *) slot)
+			 + MAXALIGN(basesz));
+		slot->tts_isnull = (bool *)
+			(((char *) slot)
+			 + MAXALIGN(basesz)
+			 + MAXALIGN(tupleDesc->natts * sizeof(Datum)));
+	}
+
+	/*
+	 * And allow slot type specific initialization.
+	 */
+	slot->tts_ops->init(slot);
+
+	return slot;
+}
+
 /* --------------------------------
  *		ExecAllocTableSlot
  *
@@ -1458,6 +1509,39 @@ ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
 	pfree(slot);
 }
 
+TupleTableSlot *
+MakeSingleTupleTableSlotBatch(TupleDesc tupdesc,
+							  const TupleTableSlotOps *tts_ops)
+{
+	TupleTableSlot *slot = MakeTupleTableSlotBatch(tupdesc, tts_ops);
+
+	return slot;
+}
+
+/* FIXME Does this need to match ExecResetTupleTable's processing of one slot? */
+void
+ExecDropTupleTableSlotBatch(TupleTableSlotBatch *batch)
+{
+	for (int i = 0; i < batch->ttsb_nslots; i++)
+	{
+		TupleTableSlot *slot = batch->ttsb_slots[i];
+
+		Assert(IsA(slot, TupleTableSlot));
+		ExecClearTuple(slot);
+		slot->tts_ops->release(slot);
+		if (!TTS_FIXED(slot))
+		{
+			if (slot->tts_values)
+				pfree(slot->tts_values);
+			if (slot->tts_isnull)
+				pfree(slot->tts_isnull);
+		}
+		pfree(slot);
+	}
+
+	ReleaseTupleDesc(batch->ttsb_tupleDescriptor);
+}
+
 
 /* ----------------------------------------------------------------
  *				  tuple table slot accessor functions
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index e16bf025692..b9500db1129 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -862,6 +862,9 @@ extern const TupleTableSlotOps *table_slot_callbacks(Relation relation);
  */
 extern TupleTableSlot *table_slot_create(Relation relation, List **reglist);
 
+extern TupleTableSlotBatch *table_slot_batch_create(Relation relation, int maxslots);
+extern TupleTableSlot *table_slot_batch_next(TupleTableSlotBatch *batch);
+
 
 /* ----------------------------------------------------------------------------
  * Table scan functions.
diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h
index 43f1d999b91..38a8e170c89 100644
--- a/src/include/executor/tuptable.h
+++ b/src/include/executor/tuptable.h
@@ -225,6 +225,17 @@ struct TupleTableSlotOps
 	MinimalTuple (*copy_minimal_tuple) (TupleTableSlot *slot, Size extra);
 };
 
+/* a batch of tuple table slots */
+typedef struct TupleTableSlotBatch
+{
+	NodeTag		type;
+	int			ttsb_maxslots;
+	int			ttsb_nslots;
+	const TupleTableSlotOps *const ttsb_ops; /* implementation of slot */
+	TupleDesc		ttsb_tupleDescriptor;	/* slot's tuple descriptor */
+	TupleTableSlot *ttsb_slots[FLEXIBLE_ARRAY_MEMBER];
+} TupleTableSlotBatch;
+
 /*
  * Predefined TupleTableSlotOps for various types of TupleTableSlotOps. The
  * same are used to identify the type of a given slot.
@@ -347,6 +358,11 @@ extern void slot_getmissingattrs(TupleTableSlot *slot, int startAttNum,
 								 int lastAttNum);
 extern void slot_getsomeattrs_int(TupleTableSlot *slot, int attnum);
 
+extern TupleTableSlot *MakeTupleTableSlotBatch(TupleDesc tupleDesc,
+											   const TupleTableSlotOps *tts_ops);
+extern TupleTableSlot *MakeSingleTupleTableSlotBatch(TupleDesc tupleDesc,
+													 const TupleTableSlotOps *tts_ops);
+extern void ExecDropTupleTableSlotBatch(TupleTableSlotBatch *batch);
 
 #ifndef FRONTEND
 
-- 
2.51.0

Re: Optimizing ResouceOwner to speed up COPY

Reply via email to