On Sat, 2020-09-05 at 12:03 -0700, Peter Geoghegan wrote: > We should totally disable the preallocation stuff for external sorts > in any case. External sorts are naturally characterized by relatively > large, distinct batching of reads and writes -- preallocation cannot > help.
Patch attached to disable preallocation for Sort. I'm still looking into the other concerns. Regards, Jeff Davis
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 9776263ae75..f74d4841f17 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -2882,7 +2882,7 @@ hashagg_tapeinfo_init(AggState *aggstate) HashTapeInfo *tapeinfo = palloc(sizeof(HashTapeInfo)); int init_tapes = 16; /* expanded dynamically */ - tapeinfo->tapeset = LogicalTapeSetCreate(init_tapes, NULL, NULL, -1); + tapeinfo->tapeset = LogicalTapeSetCreate(init_tapes, true, NULL, NULL, -1); tapeinfo->ntapes = init_tapes; tapeinfo->nfreetapes = init_tapes; tapeinfo->freetapes_alloc = init_tapes; diff --git a/src/backend/utils/sort/logtape.c b/src/backend/utils/sort/logtape.c index bbb01f6d337..8ec224b62de 100644 --- a/src/backend/utils/sort/logtape.c +++ b/src/backend/utils/sort/logtape.c @@ -212,6 +212,7 @@ struct LogicalTapeSet long *freeBlocks; /* resizable array holding minheap */ long nFreeBlocks; /* # of currently free blocks */ Size freeBlocksLen; /* current allocated length of freeBlocks[] */ + bool enable_prealloc; /* preallocate write blocks? */ /* The array of logical tapes. */ int nTapes; /* # of logical tapes in set */ @@ -220,6 +221,7 @@ struct LogicalTapeSet static void ltsWriteBlock(LogicalTapeSet *lts, long blocknum, void *buffer); static void ltsReadBlock(LogicalTapeSet *lts, long blocknum, void *buffer); +static long ltsGetBlock(LogicalTapeSet *lts, LogicalTape *lt); static long ltsGetFreeBlock(LogicalTapeSet *lts); static long ltsGetPreallocBlock(LogicalTapeSet *lts, LogicalTape *lt); static void ltsReleaseBlock(LogicalTapeSet *lts, long blocknum); @@ -373,8 +375,20 @@ parent_offset(unsigned long i) } /* - * Select the lowest currently unused block by taking the first element from - * the freelist min heap. + * Get the next block for writing. + */ +static long +ltsGetBlock(LogicalTapeSet *lts, LogicalTape *lt) +{ + if (lts->enable_prealloc) + return ltsGetPreallocBlock(lts, lt); + else + return ltsGetFreeBlock(lts); +} + +/* + * Select the lowest currently unused block from the tape set's global free + * list min heap. */ static long ltsGetFreeBlock(LogicalTapeSet *lts) @@ -430,7 +444,8 @@ ltsGetFreeBlock(LogicalTapeSet *lts) /* * Return the lowest free block number from the tape's preallocation list. - * Refill the preallocation list if necessary. + * Refill the preallocation list with blocks from the tape set's free list if + * necessary. */ static long ltsGetPreallocBlock(LogicalTapeSet *lts, LogicalTape *lt) @@ -671,8 +686,8 @@ ltsInitReadBuffer(LogicalTapeSet *lts, LogicalTape *lt) * infrastructure that may be lifted in the future. */ LogicalTapeSet * -LogicalTapeSetCreate(int ntapes, TapeShare *shared, SharedFileSet *fileset, - int worker) +LogicalTapeSetCreate(int ntapes, bool preallocate, TapeShare *shared, + SharedFileSet *fileset, int worker) { LogicalTapeSet *lts; int i; @@ -689,6 +704,7 @@ LogicalTapeSetCreate(int ntapes, TapeShare *shared, SharedFileSet *fileset, lts->freeBlocksLen = 32; /* reasonable initial guess */ lts->freeBlocks = (long *) palloc(lts->freeBlocksLen * sizeof(long)); lts->nFreeBlocks = 0; + lts->enable_prealloc = preallocate; lts->nTapes = ntapes; lts->tapes = (LogicalTape *) palloc(ntapes * sizeof(LogicalTape)); @@ -782,7 +798,7 @@ LogicalTapeWrite(LogicalTapeSet *lts, int tapenum, Assert(lt->firstBlockNumber == -1); Assert(lt->pos == 0); - lt->curBlockNumber = ltsGetPreallocBlock(lts, lt); + lt->curBlockNumber = ltsGetBlock(lts, lt); lt->firstBlockNumber = lt->curBlockNumber; TapeBlockGetTrailer(lt->buffer)->prev = -1L; @@ -806,7 +822,7 @@ LogicalTapeWrite(LogicalTapeSet *lts, int tapenum, * First allocate the next block, so that we can store it in the * 'next' pointer of this block. */ - nextBlockNumber = ltsGetPreallocBlock(lts, lt); + nextBlockNumber = ltsGetBlock(lts, lt); /* set the next-pointer and dump the current block. */ TapeBlockGetTrailer(lt->buffer)->next = nextBlockNumber; diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 3c49476483b..cbda911f465 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -2591,7 +2591,7 @@ inittapes(Tuplesortstate *state, bool mergeruns) /* Create the tape set and allocate the per-tape data arrays */ inittapestate(state, maxTapes); state->tapeset = - LogicalTapeSetCreate(maxTapes, NULL, + LogicalTapeSetCreate(maxTapes, false, NULL, state->shared ? &state->shared->fileset : NULL, state->worker); @@ -4657,8 +4657,9 @@ leader_takeover_tapes(Tuplesortstate *state) * randomAccess is disallowed for parallel sorts. */ inittapestate(state, nParticipants + 1); - state->tapeset = LogicalTapeSetCreate(nParticipants + 1, shared->tapes, - &shared->fileset, state->worker); + state->tapeset = LogicalTapeSetCreate(nParticipants + 1, false, + shared->tapes, &shared->fileset, + state->worker); /* mergeruns() relies on currentRun for # of runs (in one-pass cases) */ state->currentRun = nParticipants; diff --git a/src/include/utils/logtape.h b/src/include/utils/logtape.h index 39a99174afe..da5159e4c6c 100644 --- a/src/include/utils/logtape.h +++ b/src/include/utils/logtape.h @@ -54,7 +54,8 @@ typedef struct TapeShare * prototypes for functions in logtape.c */ -extern LogicalTapeSet *LogicalTapeSetCreate(int ntapes, TapeShare *shared, +extern LogicalTapeSet *LogicalTapeSetCreate(int ntapes, bool preallocate, + TapeShare *shared, SharedFileSet *fileset, int worker); extern void LogicalTapeSetClose(LogicalTapeSet *lts); extern void LogicalTapeSetForgetFreeSpace(LogicalTapeSet *lts);