Hi, David and I were chatting about this patch, in the context of his bump allocator patch. Attached is a rebased version that is also split up into two steps, and a bit more polished.
I wasn't sure what a good test was. I ended up measuring COPY pgbench_accounts TO '/dev/null' WITH (FORMAT 'binary'); of a scale 1 database with pgbench: c=1;pgbench -q -i -s1 && pgbench -n -c$c -j$c -t100 -f <(echo "COPY pgbench_accounts TO '/dev/null' WITH (FORMAT 'binary');") average latency HEAD: 33.865 ms 01: 32.820 ms 02: 29.934 ms The server was pinned to the one core, turbo mode disabled. That's a pretty nice win, I'd say. And I don't think this is actually the most allocator bound workload, I just tried something fairly random... Greetings, Andres Freund
>From 4b97070e32ed323ae0f083bf865717c6d271ce53 Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Tue, 18 Jul 2023 18:55:58 -0700 Subject: [PATCH v2 1/4] Optimize palloc() etc to allow sibling calls The reason palloc() etc previously couldn't use sibling calls is that they did check the returned value (to e.g. raise an error when the allocation fails). Push the error handling down into the memory context implementation - they can avoid performing the check at all in the hot code paths. --- src/include/nodes/memnodes.h | 4 +- src/include/utils/memutils_internal.h | 29 +++- src/backend/utils/mmgr/alignedalloc.c | 11 +- src/backend/utils/mmgr/aset.c | 23 ++-- src/backend/utils/mmgr/generation.c | 14 +- src/backend/utils/mmgr/mcxt.c | 186 ++++++-------------------- src/backend/utils/mmgr/slab.c | 12 +- 7 files changed, 102 insertions(+), 177 deletions(-) diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h index ff6453bb7ac..47d2932e6ed 100644 --- a/src/include/nodes/memnodes.h +++ b/src/include/nodes/memnodes.h @@ -57,10 +57,10 @@ typedef void (*MemoryStatsPrintFunc) (MemoryContext context, void *passthru, typedef struct MemoryContextMethods { - void *(*alloc) (MemoryContext context, Size size); + void *(*alloc) (MemoryContext context, Size size, int flags); /* call this free_p in case someone #define's free() */ void (*free_p) (void *pointer); - void *(*realloc) (void *pointer, Size size); + void *(*realloc) (void *pointer, Size size, int flags); void (*reset) (MemoryContext context); void (*delete_context) (MemoryContext context); MemoryContext (*get_chunk_context) (void *pointer); diff --git a/src/include/utils/memutils_internal.h b/src/include/utils/memutils_internal.h index 2d107bbf9d4..3a050819263 100644 --- a/src/include/utils/memutils_internal.h +++ b/src/include/utils/memutils_internal.h @@ -19,9 +19,9 @@ #include "utils/memutils.h" /* These functions implement the MemoryContext API for AllocSet context. */ -extern void *AllocSetAlloc(MemoryContext context, Size size); +extern void *AllocSetAlloc(MemoryContext context, Size size, int flags); extern void AllocSetFree(void *pointer); -extern void *AllocSetRealloc(void *pointer, Size size); +extern void *AllocSetRealloc(void *pointer, Size size, int flags); extern void AllocSetReset(MemoryContext context); extern void AllocSetDelete(MemoryContext context); extern MemoryContext AllocSetGetChunkContext(void *pointer); @@ -36,9 +36,9 @@ extern void AllocSetCheck(MemoryContext context); #endif /* These functions implement the MemoryContext API for Generation context. */ -extern void *GenerationAlloc(MemoryContext context, Size size); +extern void *GenerationAlloc(MemoryContext context, Size size, int flags); extern void GenerationFree(void *pointer); -extern void *GenerationRealloc(void *pointer, Size size); +extern void *GenerationRealloc(void *pointer, Size size, int flags); extern void GenerationReset(MemoryContext context); extern void GenerationDelete(MemoryContext context); extern MemoryContext GenerationGetChunkContext(void *pointer); @@ -54,9 +54,9 @@ extern void GenerationCheck(MemoryContext context); /* These functions implement the MemoryContext API for Slab context. */ -extern void *SlabAlloc(MemoryContext context, Size size); +extern void *SlabAlloc(MemoryContext context, Size size, int flags); extern void SlabFree(void *pointer); -extern void *SlabRealloc(void *pointer, Size size); +extern void *SlabRealloc(void *pointer, Size size, int flags); extern void SlabReset(MemoryContext context); extern void SlabDelete(MemoryContext context); extern MemoryContext SlabGetChunkContext(void *pointer); @@ -75,7 +75,7 @@ extern void SlabCheck(MemoryContext context); * part of a fully-fledged MemoryContext type. */ extern void AlignedAllocFree(void *pointer); -extern void *AlignedAllocRealloc(void *pointer, Size size); +extern void *AlignedAllocRealloc(void *pointer, Size size, int flags); extern MemoryContext AlignedAllocGetChunkContext(void *pointer); extern Size AlignedAllocGetChunkSpace(void *pointer); @@ -133,4 +133,19 @@ extern void MemoryContextCreate(MemoryContext node, MemoryContext parent, const char *name); +extern void *MemoryContextAllocationFailure(MemoryContext context, Size size, int flags); + +extern void MemoryContextSizeFailure(MemoryContext context, Size size, int flags) pg_attribute_noreturn(); + +static inline void +MemoryContextCheckSize(MemoryContext context, Size size, int flags) +{ + if (unlikely(!AllocSizeIsValid(size))) + { + if (!(flags & MCXT_ALLOC_HUGE) || !AllocHugeSizeIsValid(size)) + MemoryContextSizeFailure(context, size, flags); + } +} + + #endif /* MEMUTILS_INTERNAL_H */ diff --git a/src/backend/utils/mmgr/alignedalloc.c b/src/backend/utils/mmgr/alignedalloc.c index 627e988852b..83c665675b8 100644 --- a/src/backend/utils/mmgr/alignedalloc.c +++ b/src/backend/utils/mmgr/alignedalloc.c @@ -57,7 +57,7 @@ AlignedAllocFree(void *pointer) * memory will be uninitialized. */ void * -AlignedAllocRealloc(void *pointer, Size size) +AlignedAllocRealloc(void *pointer, Size size, int flags) { MemoryChunk *redirchunk = PointerGetMemoryChunk(pointer); Size alignto; @@ -97,14 +97,17 @@ AlignedAllocRealloc(void *pointer, Size size) #endif ctx = GetMemoryChunkContext(unaligned); - newptr = MemoryContextAllocAligned(ctx, size, alignto, 0); + newptr = MemoryContextAllocAligned(ctx, size, alignto, flags); /* * We may memcpy beyond the end of the original allocation request size, * so we must mark the entire allocation as defined. */ - VALGRIND_MAKE_MEM_DEFINED(pointer, old_size); - memcpy(newptr, pointer, Min(size, old_size)); + if (likely(newptr != NULL)) + { + VALGRIND_MAKE_MEM_DEFINED(pointer, old_size); + memcpy(newptr, pointer, Min(size, old_size)); + } pfree(unaligned); return newptr; diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c index c3affaf5a8a..b72ec68f7dd 100644 --- a/src/backend/utils/mmgr/aset.c +++ b/src/backend/utils/mmgr/aset.c @@ -700,7 +700,7 @@ AllocSetDelete(MemoryContext context) * return space that is marked NOACCESS - AllocSetRealloc has to beware! */ void * -AllocSetAlloc(MemoryContext context, Size size) +AllocSetAlloc(MemoryContext context, Size size, int flags) { AllocSet set = (AllocSet) context; AllocBlock block; @@ -717,6 +717,9 @@ AllocSetAlloc(MemoryContext context, Size size) */ if (size > set->allocChunkLimit) { + /* check size, only allocation path where the limits could be hit */ + MemoryContextCheckSize(context, size, flags); + #ifdef MEMORY_CONTEXT_CHECKING /* ensure there's always space for the sentinel byte */ chunk_size = MAXALIGN(size + 1); @@ -727,7 +730,7 @@ AllocSetAlloc(MemoryContext context, Size size) blksize = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; block = (AllocBlock) malloc(blksize); if (block == NULL) - return NULL; + return MemoryContextAllocationFailure(context, size, flags); context->mem_allocated += blksize; @@ -940,7 +943,7 @@ AllocSetAlloc(MemoryContext context, Size size) } if (block == NULL) - return NULL; + return MemoryContextAllocationFailure(context, size, flags); context->mem_allocated += blksize; @@ -1106,7 +1109,7 @@ AllocSetFree(void *pointer) * request size.) */ void * -AllocSetRealloc(void *pointer, Size size) +AllocSetRealloc(void *pointer, Size size, int flags) { AllocBlock block; AllocSet set; @@ -1139,6 +1142,9 @@ AllocSetRealloc(void *pointer, Size size) set = block->aset; + /* check size, only allocation path where the limits could be hit */ + MemoryContextCheckSize((MemoryContext) set, size, flags); + oldchksize = block->endptr - (char *) pointer; #ifdef MEMORY_CONTEXT_CHECKING @@ -1165,7 +1171,8 @@ AllocSetRealloc(void *pointer, Size size) { /* Disallow access to the chunk header. */ VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOC_CHUNKHDRSZ); - return NULL; + return MemoryContextAllocationFailure(&set->header, size, flags); + } /* updated separately, not to underflow when (oldblksize > blksize) */ @@ -1325,15 +1332,15 @@ AllocSetRealloc(void *pointer, Size size) AllocPointer newPointer; Size oldsize; - /* allocate new chunk */ - newPointer = AllocSetAlloc((MemoryContext) set, size); + /* allocate new chunk (also checks size for us) */ + newPointer = AllocSetAlloc((MemoryContext) set, size, flags); /* leave immediately if request was not completed */ if (newPointer == NULL) { /* Disallow access to the chunk header. */ VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOC_CHUNKHDRSZ); - return NULL; + return MemoryContextAllocationFailure((MemoryContext) set, size, flags); } /* diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c index 92401ccf738..e932bd10277 100644 --- a/src/backend/utils/mmgr/generation.c +++ b/src/backend/utils/mmgr/generation.c @@ -342,7 +342,7 @@ GenerationDelete(MemoryContext context) * return space that is marked NOACCESS - GenerationRealloc has to beware! */ void * -GenerationAlloc(MemoryContext context, Size size) +GenerationAlloc(MemoryContext context, Size size, int flags) { GenerationContext *set = (GenerationContext *) context; GenerationBlock *block; @@ -365,9 +365,11 @@ GenerationAlloc(MemoryContext context, Size size) { Size blksize = required_size + Generation_BLOCKHDRSZ; + MemoryContextCheckSize((MemoryContext) set, size, flags); + block = (GenerationBlock *) malloc(blksize); if (block == NULL) - return NULL; + return MemoryContextAllocationFailure(context, size, flags); context->mem_allocated += blksize; @@ -470,7 +472,7 @@ GenerationAlloc(MemoryContext context, Size size) block = (GenerationBlock *) malloc(blksize); if (block == NULL) - return NULL; + return MemoryContextAllocationFailure(context, size, flags); context->mem_allocated += blksize; @@ -735,7 +737,7 @@ GenerationFree(void *pointer) * into the old chunk - in that case we just update chunk header. */ void * -GenerationRealloc(void *pointer, Size size) +GenerationRealloc(void *pointer, Size size, int flags) { MemoryChunk *chunk = PointerGetMemoryChunk(pointer); GenerationContext *set; @@ -838,14 +840,14 @@ GenerationRealloc(void *pointer, Size size) } /* allocate new chunk */ - newPointer = GenerationAlloc((MemoryContext) set, size); + newPointer = GenerationAlloc((MemoryContext) set, size, flags); /* leave immediately if request was not completed */ if (newPointer == NULL) { /* Disallow access to the chunk header. */ VALGRIND_MAKE_MEM_NOACCESS(chunk, Generation_CHUNKHDRSZ); - return NULL; + return MemoryContextAllocationFailure((MemoryContext) set, size, flags); } /* diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index 9fc83f11f6f..943f041af61 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -34,7 +34,7 @@ static void BogusFree(void *pointer); -static void *BogusRealloc(void *pointer, Size size); +static void *BogusRealloc(void *pointer, Size size, int flags); static MemoryContext BogusGetChunkContext(void *pointer); static Size BogusGetChunkSpace(void *pointer); @@ -237,7 +237,7 @@ BogusFree(void *pointer) } static void * -BogusRealloc(void *pointer, Size size) +BogusRealloc(void *pointer, Size size, int flags) { elog(ERROR, "repalloc called with invalid pointer %p (header 0x%016llx)", pointer, (unsigned long long) GetMemoryChunkHeader(pointer)); @@ -1010,6 +1010,27 @@ MemoryContextCreate(MemoryContext node, VALGRIND_CREATE_MEMPOOL(node, 0, false); } +void * +MemoryContextAllocationFailure(MemoryContext context, Size size, int flags) +{ + if ((flags & MCXT_ALLOC_NO_OOM) == 0) + { + MemoryContextStats(TopMemoryContext); + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"), + errdetail("Failed on request of size %zu in memory context \"%s\".", + size, context->name))); + } + return NULL; +} + +void +MemoryContextSizeFailure(MemoryContext context, Size size, int flags) +{ + elog(ERROR, "invalid memory alloc request size %zu", size); +} + /* * MemoryContextAlloc * Allocate space within the specified context. @@ -1025,28 +1046,9 @@ MemoryContextAlloc(MemoryContext context, Size size) Assert(MemoryContextIsValid(context)); AssertNotInCriticalSection(context); - if (!AllocSizeIsValid(size)) - elog(ERROR, "invalid memory alloc request size %zu", size); - context->isReset = false; - ret = context->methods->alloc(context, size); - if (unlikely(ret == NULL)) - { - MemoryContextStats(TopMemoryContext); - - /* - * Here, and elsewhere in this module, we show the target context's - * "name" but not its "ident" (if any) in user-visible error messages. - * The "ident" string might contain security-sensitive data, such as - * values in SQL commands. - */ - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } + ret = context->methods->alloc(context, size, 0); VALGRIND_MEMPOOL_ALLOC(context, ret, size); @@ -1068,24 +1070,16 @@ MemoryContextAllocZero(MemoryContext context, Size size) Assert(MemoryContextIsValid(context)); AssertNotInCriticalSection(context); - if (!AllocSizeIsValid(size)) - elog(ERROR, "invalid memory alloc request size %zu", size); - context->isReset = false; - ret = context->methods->alloc(context, size); - if (unlikely(ret == NULL)) - { - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } + ret = context->methods->alloc(context, size, 0); VALGRIND_MEMPOOL_ALLOC(context, ret, size); + /* + * XXX: Should this also be moved into alloc()? We could possibly avoid + * zeroing in some cases (e.g. if we used mmap() ourselves. + */ MemSetAligned(ret, 0, size); return ret; @@ -1106,21 +1100,9 @@ MemoryContextAllocZeroAligned(MemoryContext context, Size size) Assert(MemoryContextIsValid(context)); AssertNotInCriticalSection(context); - if (!AllocSizeIsValid(size)) - elog(ERROR, "invalid memory alloc request size %zu", size); - context->isReset = false; - ret = context->methods->alloc(context, size); - if (unlikely(ret == NULL)) - { - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } + ret = context->methods->alloc(context, size, 0); VALGRIND_MEMPOOL_ALLOC(context, ret, size); @@ -1147,20 +1129,9 @@ MemoryContextAllocExtended(MemoryContext context, Size size, int flags) context->isReset = false; - ret = context->methods->alloc(context, size); + ret = context->methods->alloc(context, size, flags); if (unlikely(ret == NULL)) - { - if ((flags & MCXT_ALLOC_NO_OOM) == 0) - { - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } return NULL; - } VALGRIND_MEMPOOL_ALLOC(context, ret, size); @@ -1232,22 +1203,11 @@ palloc(Size size) Assert(MemoryContextIsValid(context)); AssertNotInCriticalSection(context); - if (!AllocSizeIsValid(size)) - elog(ERROR, "invalid memory alloc request size %zu", size); - context->isReset = false; - ret = context->methods->alloc(context, size); - if (unlikely(ret == NULL)) - { - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } + ret = context->methods->alloc(context, size, 0); + Assert(ret != 0); VALGRIND_MEMPOOL_ALLOC(context, ret, size); return ret; @@ -1263,21 +1223,9 @@ palloc0(Size size) Assert(MemoryContextIsValid(context)); AssertNotInCriticalSection(context); - if (!AllocSizeIsValid(size)) - elog(ERROR, "invalid memory alloc request size %zu", size); - context->isReset = false; - ret = context->methods->alloc(context, size); - if (unlikely(ret == NULL)) - { - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } + ret = context->methods->alloc(context, size, 0); VALGRIND_MEMPOOL_ALLOC(context, ret, size); @@ -1296,24 +1244,11 @@ palloc_extended(Size size, int flags) Assert(MemoryContextIsValid(context)); AssertNotInCriticalSection(context); - if (!((flags & MCXT_ALLOC_HUGE) != 0 ? AllocHugeSizeIsValid(size) : - AllocSizeIsValid(size))) - elog(ERROR, "invalid memory alloc request size %zu", size); - context->isReset = false; - ret = context->methods->alloc(context, size); + ret = context->methods->alloc(context, size, flags); if (unlikely(ret == NULL)) { - if ((flags & MCXT_ALLOC_NO_OOM) == 0) - { - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } return NULL; } @@ -1483,29 +1418,15 @@ repalloc(void *pointer, Size size) #endif void *ret; - if (!AllocSizeIsValid(size)) - elog(ERROR, "invalid memory alloc request size %zu", size); - AssertNotInCriticalSection(context); /* isReset must be false already */ Assert(!context->isReset); - ret = MCXT_METHOD(pointer, realloc) (pointer, size); - if (unlikely(ret == NULL)) - { - MemoryContext cxt = GetMemoryChunkContext(pointer); - - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, cxt->name))); - } + ret = MCXT_METHOD(pointer, realloc) (pointer, size, 0); #ifdef USE_VALGRIND - if (method != MCTX_ALIGNED_REDIRECT_ID) + if (ret != NULL && method != MCTX_ALIGNED_REDIRECT_ID) VALGRIND_MEMPOOL_CHANGE(context, pointer, ret, size); #endif @@ -1525,31 +1446,14 @@ repalloc_extended(void *pointer, Size size, int flags) #endif void *ret; - if (!((flags & MCXT_ALLOC_HUGE) != 0 ? AllocHugeSizeIsValid(size) : - AllocSizeIsValid(size))) - elog(ERROR, "invalid memory alloc request size %zu", size); - AssertNotInCriticalSection(context); /* isReset must be false already */ Assert(!context->isReset); - ret = MCXT_METHOD(pointer, realloc) (pointer, size); + ret = MCXT_METHOD(pointer, realloc) (pointer, size, flags); if (unlikely(ret == NULL)) - { - if ((flags & MCXT_ALLOC_NO_OOM) == 0) - { - MemoryContext cxt = GetMemoryChunkContext(pointer); - - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, cxt->name))); - } return NULL; - } VALGRIND_MEMPOOL_CHANGE(context, pointer, ret, size); @@ -1590,21 +1494,9 @@ MemoryContextAllocHuge(MemoryContext context, Size size) Assert(MemoryContextIsValid(context)); AssertNotInCriticalSection(context); - if (!AllocHugeSizeIsValid(size)) - elog(ERROR, "invalid memory alloc request size %zu", size); - context->isReset = false; - ret = context->methods->alloc(context, size); - if (unlikely(ret == NULL)) - { - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } + ret = context->methods->alloc(context, size, MCXT_ALLOC_HUGE); VALGRIND_MEMPOOL_ALLOC(context, ret, size); diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c index 40c1d401c4c..9b54b9546e6 100644 --- a/src/backend/utils/mmgr/slab.c +++ b/src/backend/utils/mmgr/slab.c @@ -496,7 +496,7 @@ SlabDelete(MemoryContext context) * request could not be completed; memory is added to the slab. */ void * -SlabAlloc(MemoryContext context, Size size) +SlabAlloc(MemoryContext context, Size size, int flags) { SlabContext *slab = (SlabContext *) context; SlabBlock *block; @@ -504,6 +504,12 @@ SlabAlloc(MemoryContext context, Size size) Assert(SlabIsValid(slab)); + /* + * XXX: Probably no need to check for huge allocations, we only support + * one size? Which could theoretically be huge, but that'd not make + * sense... + */ + /* sanity check that this is pointing to a valid blocklist */ Assert(slab->curBlocklistIndex >= 0); Assert(slab->curBlocklistIndex <= SlabBlocklistIndex(slab, slab->chunksPerBlock)); @@ -546,7 +552,7 @@ SlabAlloc(MemoryContext context, Size size) block = (SlabBlock *) malloc(slab->blockSize); if (unlikely(block == NULL)) - return NULL; + return MemoryContextAllocationFailure(context, size, flags); block->slab = slab; context->mem_allocated += slab->blockSize; @@ -770,7 +776,7 @@ SlabFree(void *pointer) * realloc is usually used to enlarge the chunk. */ void * -SlabRealloc(void *pointer, Size size) +SlabRealloc(void *pointer, Size size, int flags) { MemoryChunk *chunk = PointerGetMemoryChunk(pointer); SlabBlock *block; -- 2.38.0
>From 853a494562f2765275bfdfab2dfeed05293a9502 Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Tue, 18 Jul 2023 20:16:18 -0700 Subject: [PATCH v2 2/4] Optimize AllocSetAlloc() by separating hot from cold paths. With gcc the common paths of AllocSetAlloc() now don't need to initialize a stack frame when compiling with gcc. --- src/backend/utils/mmgr/aset.c | 488 +++++++++++++++++++--------------- 1 file changed, 272 insertions(+), 216 deletions(-) diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c index b72ec68f7dd..79e04b07c84 100644 --- a/src/backend/utils/mmgr/aset.c +++ b/src/backend/utils/mmgr/aset.c @@ -686,6 +686,260 @@ AllocSetDelete(MemoryContext context) free(set); } +/* + * Helper for AllocSetAlloc() that allocates an entire block for the chunk. + * + * AllocSetAlloc()'s comment explains why this is separate. + */ +pg_noinline +static void * +AllocSetAllocLarge(MemoryContext context, Size size, int flags) +{ + AllocSet set = (AllocSet) context; + AllocBlock block; + MemoryChunk *chunk; + Size chunk_size; + Size blksize; + + /* check size, only allocation path where the limits could be hit */ + MemoryContextCheckSize(context, size, flags); + +#ifdef MEMORY_CONTEXT_CHECKING + /* ensure there's always space for the sentinel byte */ + chunk_size = MAXALIGN(size + 1); +#else + chunk_size = MAXALIGN(size); +#endif + + blksize = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; + block = (AllocBlock) malloc(blksize); + if (block == NULL) + return MemoryContextAllocationFailure(context, size, flags); + + context->mem_allocated += blksize; + + block->aset = set; + block->freeptr = block->endptr = ((char *) block) + blksize; + + chunk = (MemoryChunk *) (((char *) block) + ALLOC_BLOCKHDRSZ); + + /* mark the MemoryChunk as externally managed */ + MemoryChunkSetHdrMaskExternal(chunk, MCTX_ASET_ID); + +#ifdef MEMORY_CONTEXT_CHECKING + chunk->requested_size = size; + /* set mark to catch clobber of "unused" space */ + Assert(size < chunk_size); + set_sentinel(MemoryChunkGetPointer(chunk), size); +#endif +#ifdef RANDOMIZE_ALLOCATED_MEMORY + /* fill the allocated space with junk */ + randomize_mem((char *) MemoryChunkGetPointer(chunk), size); +#endif + + /* + * Stick the new block underneath the active allocation block, if any, + * so that we don't lose the use of the space remaining therein. + */ + if (set->blocks != NULL) + { + block->prev = set->blocks; + block->next = set->blocks->next; + if (block->next) + block->next->prev = block; + set->blocks->next = block; + } + else + { + block->prev = NULL; + block->next = NULL; + set->blocks = block; + } + + /* Ensure any padding bytes are marked NOACCESS. */ + VALGRIND_MAKE_MEM_NOACCESS((char *) MemoryChunkGetPointer(chunk) + size, + chunk_size - size); + + /* Disallow access to the chunk header. */ + VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOC_CHUNKHDRSZ); + + return MemoryChunkGetPointer(chunk); +} + +/* + * Small helper for allocating a new chunk from a chunk, to avoid duplicating + * the code between AllocSetAlloc() and AllocSetAllocFromNewBlock(). + */ +static inline void * +AllocSetAllocChunkFromBlock(MemoryContext context, AllocBlock block, + Size size, Size chunk_size, int fidx) +{ + MemoryChunk *chunk; + + chunk = (MemoryChunk *) (block->freeptr); + + /* Prepare to initialize the chunk header. */ + VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ); + + block->freeptr += (chunk_size + ALLOC_CHUNKHDRSZ); + Assert(block->freeptr <= block->endptr); + + /* store the free list index in the value field */ + MemoryChunkSetHdrMask(chunk, block, fidx, MCTX_ASET_ID); + +#ifdef MEMORY_CONTEXT_CHECKING + chunk->requested_size = size; + /* set mark to catch clobber of "unused" space */ + if (size < chunk_size) + set_sentinel(MemoryChunkGetPointer(chunk), size); +#endif +#ifdef RANDOMIZE_ALLOCATED_MEMORY + /* fill the allocated space with junk */ + randomize_mem((char *) MemoryChunkGetPointer(chunk), size); +#endif + + /* Ensure any padding bytes are marked NOACCESS. */ + VALGRIND_MAKE_MEM_NOACCESS((char *) MemoryChunkGetPointer(chunk) + size, + chunk_size - size); + + /* Disallow access to the chunk header. */ + VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOC_CHUNKHDRSZ); + + return MemoryChunkGetPointer(chunk); +} + +/* + * Helper for AllocSetAlloc() that allocates a new block and returns a chunk + * allocated from it. + * + * AllocSetAlloc()'s comment explains why this is separate. + */ +pg_noinline +static void * +AllocSetAllocFromNewBlock(MemoryContext context, Size size, int flags, + int fidx) +{ + AllocSet set = (AllocSet) context; + AllocBlock block; + Size availspace; + Size blksize; + Size required_size; + Size chunk_size; + + /* due to the keeper block set->blocks should always be valid */ + Assert(set->blocks != NULL); + block = set->blocks; + availspace = block->endptr - block->freeptr; + + /* + * The existing active (top) block does not have enough room for + * the requested allocation, but it might still have a useful + * amount of space in it. Once we push it down in the block list, + * we'll never try to allocate more space from it. So, before we + * do that, carve up its free space into chunks that we can put on + * the set's freelists. + * + * Because we can only get here when there's less than + * ALLOC_CHUNK_LIMIT left in the block, this loop cannot iterate + * more than ALLOCSET_NUM_FREELISTS-1 times. + */ + while (availspace >= ((1 << ALLOC_MINBITS) + ALLOC_CHUNKHDRSZ)) + { + AllocFreeListLink *link; + Size availchunk = availspace - ALLOC_CHUNKHDRSZ; + int a_fidx = AllocSetFreeIndex(availchunk); + MemoryChunk *chunk; + + /* + * In most cases, we'll get back the index of the next larger + * freelist than the one we need to put this chunk on. The + * exception is when availchunk is exactly a power of 2. + */ + if (availchunk != GetChunkSizeFromFreeListIdx(a_fidx)) + { + a_fidx--; + Assert(a_fidx >= 0); + availchunk = GetChunkSizeFromFreeListIdx(a_fidx); + } + + chunk = (MemoryChunk *) (block->freeptr); + + /* Prepare to initialize the chunk header. */ + VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ); + block->freeptr += (availchunk + ALLOC_CHUNKHDRSZ); + availspace -= (availchunk + ALLOC_CHUNKHDRSZ); + + /* store the freelist index in the value field */ + MemoryChunkSetHdrMask(chunk, block, a_fidx, MCTX_ASET_ID); +#ifdef MEMORY_CONTEXT_CHECKING + chunk->requested_size = InvalidAllocSize; /* mark it free */ +#endif + /* push this chunk onto the free list */ + link = GetFreeListLink(chunk); + + VALGRIND_MAKE_MEM_DEFINED(link, sizeof(AllocFreeListLink)); + link->next = set->freelist[a_fidx]; + VALGRIND_MAKE_MEM_NOACCESS(link, sizeof(AllocFreeListLink)); + + set->freelist[a_fidx] = chunk; + } + + /* + * The first such block has size initBlockSize, and we double the + * space in each succeeding block, but not more than maxBlockSize. + */ + blksize = set->nextBlockSize; + set->nextBlockSize <<= 1; + if (set->nextBlockSize > set->maxBlockSize) + set->nextBlockSize = set->maxBlockSize; + + chunk_size = GetChunkSizeFromFreeListIdx(fidx); + + /* + * If initBlockSize is less than ALLOC_CHUNK_LIMIT, we could need more + * space... but try to keep it a power of 2. + */ + required_size = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; + while (blksize < required_size) + blksize <<= 1; + + /* Try to allocate it */ + block = (AllocBlock) malloc(blksize); + + /* + * We could be asking for pretty big blocks here, so cope if malloc + * fails. But give up if there's less than 1 MB or so available... + */ + while (block == NULL && blksize > 1024 * 1024) + { + blksize >>= 1; + if (blksize < required_size) + break; + block = (AllocBlock) malloc(blksize); + } + + if (block == NULL) + return MemoryContextAllocationFailure(context, size, flags); + + context->mem_allocated += blksize; + + block->aset = set; + block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ; + block->endptr = ((char *) block) + blksize; + + /* Mark unallocated space NOACCESS. */ + VALGRIND_MAKE_MEM_NOACCESS(block->freeptr, + blksize - ALLOC_BLOCKHDRSZ); + + block->prev = NULL; + block->next = set->blocks; + if (block->next) + block->next->prev = block; + set->blocks = block; + + return AllocSetAllocChunkFromBlock(context, block, size, chunk_size, fidx); +} + /* * AllocSetAlloc * Returns pointer to allocated memory of given size or NULL if @@ -698,6 +952,13 @@ AllocSetDelete(MemoryContext context) * Note: when using valgrind, it doesn't matter how the returned allocation * is marked, as mcxt.c will set it to UNDEFINED. In some paths we will * return space that is marked NOACCESS - AllocSetRealloc has to beware! + * + * This function should only contain the most common code paths, everything + * else should be in pg_noinline helper functions. That allows to avoid the + * overhead of creating a stack frame for the common cases - this function is + * one of the most common bottlenecks, making this worthwhile. The helper + * functions should always directly return the newly allocated memory, + * otherwise the stack frame is required after all. */ void * AllocSetAlloc(MemoryContext context, Size size, int flags) @@ -707,80 +968,19 @@ AllocSetAlloc(MemoryContext context, Size size, int flags) MemoryChunk *chunk; int fidx; Size chunk_size; - Size blksize; + Size availspace; Assert(AllocSetIsValid(set)); + /* due to the keeper block set->blocks should always be valid */ + Assert(set->blocks != NULL); + /* * If requested size exceeds maximum for chunks, allocate an entire block * for this request. */ if (size > set->allocChunkLimit) - { - /* check size, only allocation path where the limits could be hit */ - MemoryContextCheckSize(context, size, flags); - -#ifdef MEMORY_CONTEXT_CHECKING - /* ensure there's always space for the sentinel byte */ - chunk_size = MAXALIGN(size + 1); -#else - chunk_size = MAXALIGN(size); -#endif - - blksize = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; - block = (AllocBlock) malloc(blksize); - if (block == NULL) - return MemoryContextAllocationFailure(context, size, flags); - - context->mem_allocated += blksize; - - block->aset = set; - block->freeptr = block->endptr = ((char *) block) + blksize; - - chunk = (MemoryChunk *) (((char *) block) + ALLOC_BLOCKHDRSZ); - - /* mark the MemoryChunk as externally managed */ - MemoryChunkSetHdrMaskExternal(chunk, MCTX_ASET_ID); - -#ifdef MEMORY_CONTEXT_CHECKING - chunk->requested_size = size; - /* set mark to catch clobber of "unused" space */ - Assert(size < chunk_size); - set_sentinel(MemoryChunkGetPointer(chunk), size); -#endif -#ifdef RANDOMIZE_ALLOCATED_MEMORY - /* fill the allocated space with junk */ - randomize_mem((char *) MemoryChunkGetPointer(chunk), size); -#endif - - /* - * Stick the new block underneath the active allocation block, if any, - * so that we don't lose the use of the space remaining therein. - */ - if (set->blocks != NULL) - { - block->prev = set->blocks; - block->next = set->blocks->next; - if (block->next) - block->next->prev = block; - set->blocks->next = block; - } - else - { - block->prev = NULL; - block->next = NULL; - set->blocks = block; - } - - /* Ensure any padding bytes are marked NOACCESS. */ - VALGRIND_MAKE_MEM_NOACCESS((char *) MemoryChunkGetPointer(chunk) + size, - chunk_size - size); - - /* Disallow access to the chunk header. */ - VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOC_CHUNKHDRSZ); - - return MemoryChunkGetPointer(chunk); - } + return AllocSetAllocLarge(context, size, flags); /* * Request is small enough to be treated as a chunk. Look in the @@ -837,164 +1037,20 @@ AllocSetAlloc(MemoryContext context, Size size, int flags) chunk_size = GetChunkSizeFromFreeListIdx(fidx); Assert(chunk_size >= size); + block = set->blocks; + availspace = block->endptr - block->freeptr; + /* * If there is enough room in the active allocation block, we will put the * chunk into that block. Else must start a new one. */ - if ((block = set->blocks) != NULL) - { - Size availspace = block->endptr - block->freeptr; - - if (availspace < (chunk_size + ALLOC_CHUNKHDRSZ)) - { - /* - * The existing active (top) block does not have enough room for - * the requested allocation, but it might still have a useful - * amount of space in it. Once we push it down in the block list, - * we'll never try to allocate more space from it. So, before we - * do that, carve up its free space into chunks that we can put on - * the set's freelists. - * - * Because we can only get here when there's less than - * ALLOC_CHUNK_LIMIT left in the block, this loop cannot iterate - * more than ALLOCSET_NUM_FREELISTS-1 times. - */ - while (availspace >= ((1 << ALLOC_MINBITS) + ALLOC_CHUNKHDRSZ)) - { - AllocFreeListLink *link; - Size availchunk = availspace - ALLOC_CHUNKHDRSZ; - int a_fidx = AllocSetFreeIndex(availchunk); - - /* - * In most cases, we'll get back the index of the next larger - * freelist than the one we need to put this chunk on. The - * exception is when availchunk is exactly a power of 2. - */ - if (availchunk != GetChunkSizeFromFreeListIdx(a_fidx)) - { - a_fidx--; - Assert(a_fidx >= 0); - availchunk = GetChunkSizeFromFreeListIdx(a_fidx); - } - - chunk = (MemoryChunk *) (block->freeptr); - - /* Prepare to initialize the chunk header. */ - VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ); - block->freeptr += (availchunk + ALLOC_CHUNKHDRSZ); - availspace -= (availchunk + ALLOC_CHUNKHDRSZ); - - /* store the freelist index in the value field */ - MemoryChunkSetHdrMask(chunk, block, a_fidx, MCTX_ASET_ID); -#ifdef MEMORY_CONTEXT_CHECKING - chunk->requested_size = InvalidAllocSize; /* mark it free */ -#endif - /* push this chunk onto the free list */ - link = GetFreeListLink(chunk); - - VALGRIND_MAKE_MEM_DEFINED(link, sizeof(AllocFreeListLink)); - link->next = set->freelist[a_fidx]; - VALGRIND_MAKE_MEM_NOACCESS(link, sizeof(AllocFreeListLink)); - - set->freelist[a_fidx] = chunk; - } - /* Mark that we need to create a new block */ - block = NULL; - } - } - - /* - * Time to create a new regular (multi-chunk) block? - */ - if (block == NULL) - { - Size required_size; - - /* - * The first such block has size initBlockSize, and we double the - * space in each succeeding block, but not more than maxBlockSize. - */ - blksize = set->nextBlockSize; - set->nextBlockSize <<= 1; - if (set->nextBlockSize > set->maxBlockSize) - set->nextBlockSize = set->maxBlockSize; - - /* - * If initBlockSize is less than ALLOC_CHUNK_LIMIT, we could need more - * space... but try to keep it a power of 2. - */ - required_size = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; - while (blksize < required_size) - blksize <<= 1; - - /* Try to allocate it */ - block = (AllocBlock) malloc(blksize); - - /* - * We could be asking for pretty big blocks here, so cope if malloc - * fails. But give up if there's less than 1 MB or so available... - */ - while (block == NULL && blksize > 1024 * 1024) - { - blksize >>= 1; - if (blksize < required_size) - break; - block = (AllocBlock) malloc(blksize); - } - - if (block == NULL) - return MemoryContextAllocationFailure(context, size, flags); - - context->mem_allocated += blksize; - - block->aset = set; - block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ; - block->endptr = ((char *) block) + blksize; - - /* Mark unallocated space NOACCESS. */ - VALGRIND_MAKE_MEM_NOACCESS(block->freeptr, - blksize - ALLOC_BLOCKHDRSZ); - - block->prev = NULL; - block->next = set->blocks; - if (block->next) - block->next->prev = block; - set->blocks = block; - } + if (unlikely(availspace < (chunk_size + ALLOC_CHUNKHDRSZ))) + return AllocSetAllocFromNewBlock(context, size, flags, fidx); /* * OK, do the allocation */ - chunk = (MemoryChunk *) (block->freeptr); - - /* Prepare to initialize the chunk header. */ - VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ); - - block->freeptr += (chunk_size + ALLOC_CHUNKHDRSZ); - Assert(block->freeptr <= block->endptr); - - /* store the free list index in the value field */ - MemoryChunkSetHdrMask(chunk, block, fidx, MCTX_ASET_ID); - -#ifdef MEMORY_CONTEXT_CHECKING - chunk->requested_size = size; - /* set mark to catch clobber of "unused" space */ - if (size < chunk_size) - set_sentinel(MemoryChunkGetPointer(chunk), size); -#endif -#ifdef RANDOMIZE_ALLOCATED_MEMORY - /* fill the allocated space with junk */ - randomize_mem((char *) MemoryChunkGetPointer(chunk), size); -#endif - - /* Ensure any padding bytes are marked NOACCESS. */ - VALGRIND_MAKE_MEM_NOACCESS((char *) MemoryChunkGetPointer(chunk) + size, - chunk_size - size); - - /* Disallow access to the chunk header. */ - VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOC_CHUNKHDRSZ); - - return MemoryChunkGetPointer(chunk); + return AllocSetAllocChunkFromBlock(context, block, size, chunk_size, fidx); } /* -- 2.38.0