On Sat, 2020-09-05 at 12:03 -0700, Peter Geoghegan wrote:
> We should totally disable the preallocation stuff for external sorts
> in any case. External sorts are naturally characterized by relatively
> large, distinct batching of reads and writes -- preallocation cannot
> help.

Patch attached to disable preallocation for Sort.

I'm still looking into the other concerns.

Regards,
        Jeff Davis

diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 9776263ae75..f74d4841f17 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -2882,7 +2882,7 @@ hashagg_tapeinfo_init(AggState *aggstate)
 	HashTapeInfo *tapeinfo = palloc(sizeof(HashTapeInfo));
 	int			init_tapes = 16;	/* expanded dynamically */
 
-	tapeinfo->tapeset = LogicalTapeSetCreate(init_tapes, NULL, NULL, -1);
+	tapeinfo->tapeset = LogicalTapeSetCreate(init_tapes, true, NULL, NULL, -1);
 	tapeinfo->ntapes = init_tapes;
 	tapeinfo->nfreetapes = init_tapes;
 	tapeinfo->freetapes_alloc = init_tapes;
diff --git a/src/backend/utils/sort/logtape.c b/src/backend/utils/sort/logtape.c
index bbb01f6d337..8ec224b62de 100644
--- a/src/backend/utils/sort/logtape.c
+++ b/src/backend/utils/sort/logtape.c
@@ -212,6 +212,7 @@ struct LogicalTapeSet
 	long	   *freeBlocks;		/* resizable array holding minheap */
 	long		nFreeBlocks;	/* # of currently free blocks */
 	Size		freeBlocksLen;	/* current allocated length of freeBlocks[] */
+	bool		enable_prealloc;	/* preallocate write blocks? */
 
 	/* The array of logical tapes. */
 	int			nTapes;			/* # of logical tapes in set */
@@ -220,6 +221,7 @@ struct LogicalTapeSet
 
 static void ltsWriteBlock(LogicalTapeSet *lts, long blocknum, void *buffer);
 static void ltsReadBlock(LogicalTapeSet *lts, long blocknum, void *buffer);
+static long ltsGetBlock(LogicalTapeSet *lts, LogicalTape *lt);
 static long ltsGetFreeBlock(LogicalTapeSet *lts);
 static long ltsGetPreallocBlock(LogicalTapeSet *lts, LogicalTape *lt);
 static void ltsReleaseBlock(LogicalTapeSet *lts, long blocknum);
@@ -373,8 +375,20 @@ parent_offset(unsigned long i)
 }
 
 /*
- * Select the lowest currently unused block by taking the first element from
- * the freelist min heap.
+ * Get the next block for writing.
+ */
+static long
+ltsGetBlock(LogicalTapeSet *lts, LogicalTape *lt)
+{
+	if (lts->enable_prealloc)
+		return ltsGetPreallocBlock(lts, lt);
+	else
+		return ltsGetFreeBlock(lts);
+}
+
+/*
+ * Select the lowest currently unused block from the tape set's global free
+ * list min heap.
  */
 static long
 ltsGetFreeBlock(LogicalTapeSet *lts)
@@ -430,7 +444,8 @@ ltsGetFreeBlock(LogicalTapeSet *lts)
 
 /*
  * Return the lowest free block number from the tape's preallocation list.
- * Refill the preallocation list if necessary.
+ * Refill the preallocation list with blocks from the tape set's free list if
+ * necessary.
  */
 static long
 ltsGetPreallocBlock(LogicalTapeSet *lts, LogicalTape *lt)
@@ -671,8 +686,8 @@ ltsInitReadBuffer(LogicalTapeSet *lts, LogicalTape *lt)
  * infrastructure that may be lifted in the future.
  */
 LogicalTapeSet *
-LogicalTapeSetCreate(int ntapes, TapeShare *shared, SharedFileSet *fileset,
-					 int worker)
+LogicalTapeSetCreate(int ntapes, bool preallocate, TapeShare *shared,
+					 SharedFileSet *fileset, int worker)
 {
 	LogicalTapeSet *lts;
 	int			i;
@@ -689,6 +704,7 @@ LogicalTapeSetCreate(int ntapes, TapeShare *shared, SharedFileSet *fileset,
 	lts->freeBlocksLen = 32;	/* reasonable initial guess */
 	lts->freeBlocks = (long *) palloc(lts->freeBlocksLen * sizeof(long));
 	lts->nFreeBlocks = 0;
+	lts->enable_prealloc = preallocate;
 	lts->nTapes = ntapes;
 	lts->tapes = (LogicalTape *) palloc(ntapes * sizeof(LogicalTape));
 
@@ -782,7 +798,7 @@ LogicalTapeWrite(LogicalTapeSet *lts, int tapenum,
 		Assert(lt->firstBlockNumber == -1);
 		Assert(lt->pos == 0);
 
-		lt->curBlockNumber = ltsGetPreallocBlock(lts, lt);
+		lt->curBlockNumber = ltsGetBlock(lts, lt);
 		lt->firstBlockNumber = lt->curBlockNumber;
 
 		TapeBlockGetTrailer(lt->buffer)->prev = -1L;
@@ -806,7 +822,7 @@ LogicalTapeWrite(LogicalTapeSet *lts, int tapenum,
 			 * First allocate the next block, so that we can store it in the
 			 * 'next' pointer of this block.
 			 */
-			nextBlockNumber = ltsGetPreallocBlock(lts, lt);
+			nextBlockNumber = ltsGetBlock(lts, lt);
 
 			/* set the next-pointer and dump the current block. */
 			TapeBlockGetTrailer(lt->buffer)->next = nextBlockNumber;
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 3c49476483b..cbda911f465 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -2591,7 +2591,7 @@ inittapes(Tuplesortstate *state, bool mergeruns)
 	/* Create the tape set and allocate the per-tape data arrays */
 	inittapestate(state, maxTapes);
 	state->tapeset =
-		LogicalTapeSetCreate(maxTapes, NULL,
+		LogicalTapeSetCreate(maxTapes, false, NULL,
 							 state->shared ? &state->shared->fileset : NULL,
 							 state->worker);
 
@@ -4657,8 +4657,9 @@ leader_takeover_tapes(Tuplesortstate *state)
 	 * randomAccess is disallowed for parallel sorts.
 	 */
 	inittapestate(state, nParticipants + 1);
-	state->tapeset = LogicalTapeSetCreate(nParticipants + 1, shared->tapes,
-										  &shared->fileset, state->worker);
+	state->tapeset = LogicalTapeSetCreate(nParticipants + 1, false,
+										  shared->tapes, &shared->fileset,
+										  state->worker);
 
 	/* mergeruns() relies on currentRun for # of runs (in one-pass cases) */
 	state->currentRun = nParticipants;
diff --git a/src/include/utils/logtape.h b/src/include/utils/logtape.h
index 39a99174afe..da5159e4c6c 100644
--- a/src/include/utils/logtape.h
+++ b/src/include/utils/logtape.h
@@ -54,7 +54,8 @@ typedef struct TapeShare
  * prototypes for functions in logtape.c
  */
 
-extern LogicalTapeSet *LogicalTapeSetCreate(int ntapes, TapeShare *shared,
+extern LogicalTapeSet *LogicalTapeSetCreate(int ntapes, bool preallocate,
+											TapeShare *shared,
 											SharedFileSet *fileset, int worker);
 extern void LogicalTapeSetClose(LogicalTapeSet *lts);
 extern void LogicalTapeSetForgetFreeSpace(LogicalTapeSet *lts);

Reply via email to