Hi, On 2021-07-18 19:23:31 +0200, Tomas Vondra wrote: > Sounds great! Thanks for investigating this and for the improvements. > > It might be good to do some experiments to see how the changes affect memory > consumption for practical workloads. I'm willing to spend soem time on that, > if needed.
I've attached my changes. They're in a rough shape right now, but I think good enough for an initial look. Greetings, Andres Freund
>From af4cd1f0b64cd52d7eab342493e3dfd6b0d8388e Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Mon, 19 Jul 2021 12:55:51 -0700 Subject: [PATCH 1/2] WIP: optimize allocations by separating hot from cold paths. --- src/include/nodes/memnodes.h | 4 +- src/include/utils/memutils.h | 13 + src/backend/utils/mmgr/aset.c | 537 ++++++++++++++-------------- src/backend/utils/mmgr/generation.c | 22 +- src/backend/utils/mmgr/mcxt.c | 179 +++------- src/backend/utils/mmgr/slab.c | 14 +- 6 files changed, 354 insertions(+), 415 deletions(-) diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h index e6a757d6a07..8a42d2ff999 100644 --- a/src/include/nodes/memnodes.h +++ b/src/include/nodes/memnodes.h @@ -57,10 +57,10 @@ typedef void (*MemoryStatsPrintFunc) (MemoryContext context, void *passthru, typedef struct MemoryContextMethods { - void *(*alloc) (MemoryContext context, Size size); + void *(*alloc) (MemoryContext context, Size size, int flags); /* call this free_p in case someone #define's free() */ void (*free_p) (MemoryContext context, void *pointer); - void *(*realloc) (MemoryContext context, void *pointer, Size size); + void *(*realloc) (MemoryContext context, void *pointer, Size size, int flags); void (*reset) (MemoryContext context); void (*delete_context) (MemoryContext context); Size (*get_chunk_space) (MemoryContext context, void *pointer); diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h index ff872274d44..2f75b4cca46 100644 --- a/src/include/utils/memutils.h +++ b/src/include/utils/memutils.h @@ -147,6 +147,19 @@ extern void MemoryContextCreate(MemoryContext node, extern void HandleLogMemoryContextInterrupt(void); extern void ProcessLogMemoryContextInterrupt(void); +extern void *MemoryContextAllocationFailure(MemoryContext context, Size size, int flags); + +extern void MemoryContextSizeFailure(MemoryContext context, Size size, int flags) pg_attribute_noreturn(); + +static inline void +MemoryContextCheckSize(MemoryContext context, Size size, int flags) +{ + if (unlikely(!AllocSizeIsValid(size))) + { + if (!(flags & MCXT_ALLOC_HUGE) || !AllocHugeSizeIsValid(size)) + MemoryContextSizeFailure(context, size, flags); + } +} /* * Memory-context-type-specific functions diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c index 77872e77bcd..00878354392 100644 --- a/src/backend/utils/mmgr/aset.c +++ b/src/backend/utils/mmgr/aset.c @@ -263,9 +263,9 @@ static AllocSetFreeList context_freelists[2] = /* * These functions implement the MemoryContext API for AllocSet contexts. */ -static void *AllocSetAlloc(MemoryContext context, Size size); +static void *AllocSetAlloc(MemoryContext context, Size size, int flags); static void AllocSetFree(MemoryContext context, void *pointer); -static void *AllocSetRealloc(MemoryContext context, void *pointer, Size size); +static void *AllocSetRealloc(MemoryContext context, void *pointer, Size size, int flags); static void AllocSetReset(MemoryContext context); static void AllocSetDelete(MemoryContext context); static Size AllocSetGetChunkSpace(MemoryContext context, void *pointer); @@ -704,266 +704,10 @@ AllocSetDelete(MemoryContext context) free(set); } -/* - * AllocSetAlloc - * Returns pointer to allocated memory of given size or NULL if - * request could not be completed; memory is added to the set. - * - * No request may exceed: - * MAXALIGN_DOWN(SIZE_MAX) - ALLOC_BLOCKHDRSZ - ALLOC_CHUNKHDRSZ - * All callers use a much-lower limit. - * - * Note: when using valgrind, it doesn't matter how the returned allocation - * is marked, as mcxt.c will set it to UNDEFINED. In some paths we will - * return space that is marked NOACCESS - AllocSetRealloc has to beware! - */ -static void * -AllocSetAlloc(MemoryContext context, Size size) +static inline void * +AllocSetAllocReturnChunk(AllocSet set, Size size, AllocChunk chunk, Size chunk_size) { - AllocSet set = (AllocSet) context; - AllocBlock block; - AllocChunk chunk; - int fidx; - Size chunk_size; - Size blksize; - - AssertArg(AllocSetIsValid(set)); - - /* - * If requested size exceeds maximum for chunks, allocate an entire block - * for this request. - */ - if (size > set->allocChunkLimit) - { - chunk_size = MAXALIGN(size); - blksize = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; - block = (AllocBlock) malloc(blksize); - if (block == NULL) - return NULL; - - context->mem_allocated += blksize; - - block->aset = set; - block->freeptr = block->endptr = ((char *) block) + blksize; - - chunk = (AllocChunk) (((char *) block) + ALLOC_BLOCKHDRSZ); - chunk->aset = set; - chunk->size = chunk_size; -#ifdef MEMORY_CONTEXT_CHECKING - chunk->requested_size = size; - /* set mark to catch clobber of "unused" space */ - if (size < chunk_size) - set_sentinel(AllocChunkGetPointer(chunk), size); -#endif -#ifdef RANDOMIZE_ALLOCATED_MEMORY - /* fill the allocated space with junk */ - randomize_mem((char *) AllocChunkGetPointer(chunk), size); -#endif - - /* - * Stick the new block underneath the active allocation block, if any, - * so that we don't lose the use of the space remaining therein. - */ - if (set->blocks != NULL) - { - block->prev = set->blocks; - block->next = set->blocks->next; - if (block->next) - block->next->prev = block; - set->blocks->next = block; - } - else - { - block->prev = NULL; - block->next = NULL; - set->blocks = block; - } - - /* Ensure any padding bytes are marked NOACCESS. */ - VALGRIND_MAKE_MEM_NOACCESS((char *) AllocChunkGetPointer(chunk) + size, - chunk_size - size); - - /* Disallow external access to private part of chunk header. */ - VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOCCHUNK_PRIVATE_LEN); - - return AllocChunkGetPointer(chunk); - } - - /* - * Request is small enough to be treated as a chunk. Look in the - * corresponding free list to see if there is a free chunk we could reuse. - * If one is found, remove it from the free list, make it again a member - * of the alloc set and return its data address. - */ - fidx = AllocSetFreeIndex(size); - chunk = set->freelist[fidx]; - if (chunk != NULL) - { - Assert(chunk->size >= size); - - set->freelist[fidx] = (AllocChunk) chunk->aset; - - chunk->aset = (void *) set; - -#ifdef MEMORY_CONTEXT_CHECKING - chunk->requested_size = size; - /* set mark to catch clobber of "unused" space */ - if (size < chunk->size) - set_sentinel(AllocChunkGetPointer(chunk), size); -#endif -#ifdef RANDOMIZE_ALLOCATED_MEMORY - /* fill the allocated space with junk */ - randomize_mem((char *) AllocChunkGetPointer(chunk), size); -#endif - - /* Ensure any padding bytes are marked NOACCESS. */ - VALGRIND_MAKE_MEM_NOACCESS((char *) AllocChunkGetPointer(chunk) + size, - chunk->size - size); - - /* Disallow external access to private part of chunk header. */ - VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOCCHUNK_PRIVATE_LEN); - - return AllocChunkGetPointer(chunk); - } - - /* - * Choose the actual chunk size to allocate. - */ - chunk_size = (1 << ALLOC_MINBITS) << fidx; - Assert(chunk_size >= size); - - /* - * If there is enough room in the active allocation block, we will put the - * chunk into that block. Else must start a new one. - */ - if ((block = set->blocks) != NULL) - { - Size availspace = block->endptr - block->freeptr; - - if (availspace < (chunk_size + ALLOC_CHUNKHDRSZ)) - { - /* - * The existing active (top) block does not have enough room for - * the requested allocation, but it might still have a useful - * amount of space in it. Once we push it down in the block list, - * we'll never try to allocate more space from it. So, before we - * do that, carve up its free space into chunks that we can put on - * the set's freelists. - * - * Because we can only get here when there's less than - * ALLOC_CHUNK_LIMIT left in the block, this loop cannot iterate - * more than ALLOCSET_NUM_FREELISTS-1 times. - */ - while (availspace >= ((1 << ALLOC_MINBITS) + ALLOC_CHUNKHDRSZ)) - { - Size availchunk = availspace - ALLOC_CHUNKHDRSZ; - int a_fidx = AllocSetFreeIndex(availchunk); - - /* - * In most cases, we'll get back the index of the next larger - * freelist than the one we need to put this chunk on. The - * exception is when availchunk is exactly a power of 2. - */ - if (availchunk != ((Size) 1 << (a_fidx + ALLOC_MINBITS))) - { - a_fidx--; - Assert(a_fidx >= 0); - availchunk = ((Size) 1 << (a_fidx + ALLOC_MINBITS)); - } - - chunk = (AllocChunk) (block->freeptr); - - /* Prepare to initialize the chunk header. */ - VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ); - - block->freeptr += (availchunk + ALLOC_CHUNKHDRSZ); - availspace -= (availchunk + ALLOC_CHUNKHDRSZ); - - chunk->size = availchunk; -#ifdef MEMORY_CONTEXT_CHECKING - chunk->requested_size = 0; /* mark it free */ -#endif - chunk->aset = (void *) set->freelist[a_fidx]; - set->freelist[a_fidx] = chunk; - } - - /* Mark that we need to create a new block */ - block = NULL; - } - } - - /* - * Time to create a new regular (multi-chunk) block? - */ - if (block == NULL) - { - Size required_size; - - /* - * The first such block has size initBlockSize, and we double the - * space in each succeeding block, but not more than maxBlockSize. - */ - blksize = set->nextBlockSize; - set->nextBlockSize <<= 1; - if (set->nextBlockSize > set->maxBlockSize) - set->nextBlockSize = set->maxBlockSize; - - /* - * If initBlockSize is less than ALLOC_CHUNK_LIMIT, we could need more - * space... but try to keep it a power of 2. - */ - required_size = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; - while (blksize < required_size) - blksize <<= 1; - - /* Try to allocate it */ - block = (AllocBlock) malloc(blksize); - - /* - * We could be asking for pretty big blocks here, so cope if malloc - * fails. But give up if there's less than 1 MB or so available... - */ - while (block == NULL && blksize > 1024 * 1024) - { - blksize >>= 1; - if (blksize < required_size) - break; - block = (AllocBlock) malloc(blksize); - } - - if (block == NULL) - return NULL; - - context->mem_allocated += blksize; - - block->aset = set; - block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ; - block->endptr = ((char *) block) + blksize; - - /* Mark unallocated space NOACCESS. */ - VALGRIND_MAKE_MEM_NOACCESS(block->freeptr, - blksize - ALLOC_BLOCKHDRSZ); - - block->prev = NULL; - block->next = set->blocks; - if (block->next) - block->next->prev = block; - set->blocks = block; - } - - /* - * OK, do the allocation - */ - chunk = (AllocChunk) (block->freeptr); - - /* Prepare to initialize the chunk header. */ - VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ); - - block->freeptr += (chunk_size + ALLOC_CHUNKHDRSZ); - Assert(block->freeptr <= block->endptr); - chunk->aset = (void *) set; - chunk->size = chunk_size; #ifdef MEMORY_CONTEXT_CHECKING chunk->requested_size = size; /* set mark to catch clobber of "unused" space */ @@ -985,6 +729,273 @@ AllocSetAlloc(MemoryContext context, Size size) return AllocChunkGetPointer(chunk); } +static void * pg_noinline +AllocSetAllocLarge(AllocSet set, Size size, int flags) +{ + AllocBlock block; + AllocChunk chunk; + Size chunk_size; + Size blksize; + + /* check size, only allocation path where the limits could be hit */ + MemoryContextCheckSize(&set->header, size, flags); + + AssertArg(AllocSetIsValid(set)); + + chunk_size = MAXALIGN(size); + blksize = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; + block = (AllocBlock) malloc(blksize); + if (block == NULL) + return NULL; + + set->header.mem_allocated += blksize; + + block->aset = set; + block->freeptr = block->endptr = ((char *) block) + blksize; + + /* + * Stick the new block underneath the active allocation block, if any, + * so that we don't lose the use of the space remaining therein. + */ + if (set->blocks != NULL) + { + block->prev = set->blocks; + block->next = set->blocks->next; + if (block->next) + block->next->prev = block; + set->blocks->next = block; + } + else + { + block->prev = NULL; + block->next = NULL; + set->blocks = block; + } + + chunk = (AllocChunk) (((char *) block) + ALLOC_BLOCKHDRSZ); + chunk->size = chunk_size; + + return AllocSetAllocReturnChunk(set, size, chunk, chunk_size); +} + +static void * pg_noinline +AllocSetAllocFromNewBlock(AllocSet set, Size size, Size chunk_size) +{ + AllocBlock block; + Size blksize; + Size required_size; + AllocChunk chunk; + + /* + * The first such block has size initBlockSize, and we double the + * space in each succeeding block, but not more than maxBlockSize. + */ + blksize = set->nextBlockSize; + set->nextBlockSize <<= 1; + if (set->nextBlockSize > set->maxBlockSize) + set->nextBlockSize = set->maxBlockSize; + + /* + * If initBlockSize is less than ALLOC_CHUNK_LIMIT, we could need more + * space... but try to keep it a power of 2. + */ + required_size = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; + while (blksize < required_size) + blksize <<= 1; + + /* Try to allocate it */ + block = (AllocBlock) malloc(blksize); + + /* + * We could be asking for pretty big blocks here, so cope if malloc + * fails. But give up if there's less than 1 MB or so available... + */ + while (block == NULL && blksize > 1024 * 1024) + { + blksize >>= 1; + if (blksize < required_size) + break; + block = (AllocBlock) malloc(blksize); + } + + if (block == NULL) + return NULL; + + set->header.mem_allocated += blksize; + + block->aset = set; + block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ; + block->endptr = ((char *) block) + blksize; + + /* Mark unallocated space NOACCESS. */ + VALGRIND_MAKE_MEM_NOACCESS(block->freeptr, + blksize - ALLOC_BLOCKHDRSZ); + + block->prev = NULL; + block->next = set->blocks; + if (block->next) + block->next->prev = block; + set->blocks = block; + + /* + * OK, do the allocation + */ + chunk = (AllocChunk) (block->freeptr); + + /* Prepare to initialize the chunk header. */ + VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ); + + block->freeptr += (chunk_size + ALLOC_CHUNKHDRSZ); + Assert(block->freeptr <= block->endptr); + + chunk->size = chunk_size; + + return AllocSetAllocReturnChunk(set, size, chunk, chunk_size); +} + +static void * pg_noinline +AllocSetAllocCarveOldAndAlloc(AllocSet set, Size size, Size chunk_size, AllocBlock block, Size availspace) +{ + AllocChunk chunk; + + /* + * The existing active (top) block does not have enough room for + * the requested allocation, but it might still have a useful + * amount of space in it. Once we push it down in the block list, + * we'll never try to allocate more space from it. So, before we + * do that, carve up its free space into chunks that we can put on + * the set's freelists. + * + * Because we can only get here when there's less than + * ALLOC_CHUNK_LIMIT left in the block, this loop cannot iterate + * more than ALLOCSET_NUM_FREELISTS-1 times. + */ + while (availspace >= ((1 << ALLOC_MINBITS) + ALLOC_CHUNKHDRSZ)) + { + Size availchunk = availspace - ALLOC_CHUNKHDRSZ; + int a_fidx = AllocSetFreeIndex(availchunk); + + /* + * In most cases, we'll get back the index of the next larger + * freelist than the one we need to put this chunk on. The + * exception is when availchunk is exactly a power of 2. + */ + if (availchunk != ((Size) 1 << (a_fidx + ALLOC_MINBITS))) + { + a_fidx--; + Assert(a_fidx >= 0); + availchunk = ((Size) 1 << (a_fidx + ALLOC_MINBITS)); + } + + chunk = (AllocChunk) (block->freeptr); + + /* Prepare to initialize the chunk header. */ + VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ); + + block->freeptr += (availchunk + ALLOC_CHUNKHDRSZ); + availspace -= (availchunk + ALLOC_CHUNKHDRSZ); + + chunk->size = availchunk; +#ifdef MEMORY_CONTEXT_CHECKING + chunk->requested_size = 0; /* mark it free */ +#endif + chunk->aset = (void *) set->freelist[a_fidx]; + set->freelist[a_fidx] = chunk; + } + + return AllocSetAllocFromNewBlock(set, size, chunk_size); +} + +/* + * AllocSetAlloc + * Returns pointer to allocated memory of given size or NULL if + * request could not be completed; memory is added to the set. + * + * No request may exceed: + * MAXALIGN_DOWN(SIZE_MAX) - ALLOC_BLOCKHDRSZ - ALLOC_CHUNKHDRSZ + * All callers use a much-lower limit. + * + * Note: when using valgrind, it doesn't matter how the returned allocation + * is marked, as mcxt.c will set it to UNDEFINED. In some paths we will + * return space that is marked NOACCESS - AllocSetRealloc has to beware! + */ +static void * +AllocSetAlloc(MemoryContext context, Size size, int flags) +{ + AllocSet set = (AllocSet) context; + AllocBlock block; + AllocChunk chunk; + int fidx; + Size chunk_size; + + AssertArg(AllocSetIsValid(set)); + + /* + * If requested size exceeds maximum for chunks, allocate an entire block + * for this request. + */ + if (unlikely(size > set->allocChunkLimit)) + return AllocSetAllocLarge(set, size, flags); + + /* + * Request is small enough to be treated as a chunk. Look in the + * corresponding free list to see if there is a free chunk we could reuse. + * If one is found, remove it from the free list, make it again a member + * of the alloc set and return its data address. + */ + fidx = AllocSetFreeIndex(size); + chunk = set->freelist[fidx]; + if (chunk != NULL) + { + Assert(chunk->size >= size); + + set->freelist[fidx] = (AllocChunk) chunk->aset; + + return AllocSetAllocReturnChunk(set, size, chunk, chunk->size); + } + + /* + * Choose the actual chunk size to allocate. + */ + chunk_size = (1 << ALLOC_MINBITS) << fidx; + Assert(chunk_size >= size); + + /* + * If there is enough room in the active allocation block, we will put the + * chunk into that block. Else must start a new one. + */ + if ((block = set->blocks) != NULL) + { + Size availspace = block->endptr - block->freeptr; + + if (unlikely(availspace < (chunk_size + ALLOC_CHUNKHDRSZ))) + return AllocSetAllocCarveOldAndAlloc(set, size, chunk_size, + block, availspace); + } + else if (unlikely(block == NULL)) + { + /* + * Time to create a new regular (multi-chunk) block. + */ + return AllocSetAllocFromNewBlock(set, size, chunk_size); + } + + /* + * OK, do the allocation + */ + chunk = (AllocChunk) (block->freeptr); + + /* Prepare to initialize the chunk header. */ + VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ); + + chunk->size = chunk_size; + + block->freeptr += (chunk_size + ALLOC_CHUNKHDRSZ); + Assert(block->freeptr <= block->endptr); + + return AllocSetAllocReturnChunk(set, size, chunk, chunk_size); +} + /* * AllocSetFree * Frees allocated memory; memory is removed from the set. @@ -1072,7 +1083,7 @@ AllocSetFree(MemoryContext context, void *pointer) * request size.) */ static void * -AllocSetRealloc(MemoryContext context, void *pointer, Size size) +AllocSetRealloc(MemoryContext context, void *pointer, Size size, int flags) { AllocSet set = (AllocSet) context; AllocChunk chunk = AllocPointerGetChunk(pointer); @@ -1081,6 +1092,8 @@ AllocSetRealloc(MemoryContext context, void *pointer, Size size) /* Allow access to private part of chunk header. */ VALGRIND_MAKE_MEM_DEFINED(chunk, ALLOCCHUNK_PRIVATE_LEN); + MemoryContextCheckSize(context, size, flags); + oldsize = chunk->size; #ifdef MEMORY_CONTEXT_CHECKING @@ -1260,7 +1273,7 @@ AllocSetRealloc(MemoryContext context, void *pointer, Size size) AllocPointer newPointer; /* allocate new chunk */ - newPointer = AllocSetAlloc((MemoryContext) set, size); + newPointer = AllocSetAlloc((MemoryContext) set, size, flags); /* leave immediately if request was not completed */ if (newPointer == NULL) diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c index 584cd614da0..5dde15654d7 100644 --- a/src/backend/utils/mmgr/generation.c +++ b/src/backend/utils/mmgr/generation.c @@ -146,9 +146,9 @@ struct GenerationChunk /* * These functions implement the MemoryContext API for Generation contexts. */ -static void *GenerationAlloc(MemoryContext context, Size size); +static void *GenerationAlloc(MemoryContext context, Size size, int flags); static void GenerationFree(MemoryContext context, void *pointer); -static void *GenerationRealloc(MemoryContext context, void *pointer, Size size); +static void *GenerationRealloc(MemoryContext context, void *pointer, Size size, int flags); static void GenerationReset(MemoryContext context); static void GenerationDelete(MemoryContext context); static Size GenerationGetChunkSpace(MemoryContext context, void *pointer); @@ -323,7 +323,7 @@ GenerationDelete(MemoryContext context) * return space that is marked NOACCESS - GenerationRealloc has to beware! */ static void * -GenerationAlloc(MemoryContext context, Size size) +GenerationAlloc(MemoryContext context, Size size, int flags) { GenerationContext *set = (GenerationContext *) context; GenerationBlock *block; @@ -335,9 +335,12 @@ GenerationAlloc(MemoryContext context, Size size) { Size blksize = chunk_size + Generation_BLOCKHDRSZ + Generation_CHUNKHDRSZ; + /* check size, only allocation path where the limits could be hit */ + MemoryContextCheckSize(context, size, flags); + block = (GenerationBlock *) malloc(blksize); if (block == NULL) - return NULL; + return MemoryContextAllocationFailure(context, size, flags); context->mem_allocated += blksize; @@ -392,7 +395,7 @@ GenerationAlloc(MemoryContext context, Size size) block = (GenerationBlock *) malloc(blksize); if (block == NULL) - return NULL; + return MemoryContextAllocationFailure(context, size, flags); context->mem_allocated += blksize; @@ -520,13 +523,15 @@ GenerationFree(MemoryContext context, void *pointer) * into the old chunk - in that case we just update chunk header. */ static void * -GenerationRealloc(MemoryContext context, void *pointer, Size size) +GenerationRealloc(MemoryContext context, void *pointer, Size size, int flags) { GenerationContext *set = (GenerationContext *) context; GenerationChunk *chunk = GenerationPointerGetChunk(pointer); GenerationPointer newPointer; Size oldsize; + MemoryContextCheckSize(context, size, flags); + /* Allow access to private part of chunk header. */ VALGRIND_MAKE_MEM_DEFINED(chunk, GENERATIONCHUNK_PRIVATE_LEN); @@ -596,14 +601,15 @@ GenerationRealloc(MemoryContext context, void *pointer, Size size) } /* allocate new chunk */ - newPointer = GenerationAlloc((MemoryContext) set, size); + newPointer = GenerationAlloc((MemoryContext) set, size, flags); /* leave immediately if request was not completed */ if (newPointer == NULL) { /* Disallow external access to private part of chunk header. */ VALGRIND_MAKE_MEM_NOACCESS(chunk, GENERATIONCHUNK_PRIVATE_LEN); - return NULL; + /* again? */ + return MemoryContextAllocationFailure(context, size, flags); } /* diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index 6919a732804..13125c4a562 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -867,28 +867,9 @@ MemoryContextAlloc(MemoryContext context, Size size) AssertArg(MemoryContextIsValid(context)); AssertNotInCriticalSection(context); - if (!AllocSizeIsValid(size)) - elog(ERROR, "invalid memory alloc request size %zu", size); - context->isReset = false; - ret = context->methods->alloc(context, size); - if (unlikely(ret == NULL)) - { - MemoryContextStats(TopMemoryContext); - - /* - * Here, and elsewhere in this module, we show the target context's - * "name" but not its "ident" (if any) in user-visible error messages. - * The "ident" string might contain security-sensitive data, such as - * values in SQL commands. - */ - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } + ret = context->methods->alloc(context, size, 0); VALGRIND_MEMPOOL_ALLOC(context, ret, size); @@ -910,21 +891,10 @@ MemoryContextAllocZero(MemoryContext context, Size size) AssertArg(MemoryContextIsValid(context)); AssertNotInCriticalSection(context); - if (!AllocSizeIsValid(size)) - elog(ERROR, "invalid memory alloc request size %zu", size); - context->isReset = false; - ret = context->methods->alloc(context, size); - if (unlikely(ret == NULL)) - { - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } + ret = context->methods->alloc(context, size, 0); + Assert(ret != NULL); VALGRIND_MEMPOOL_ALLOC(context, ret, size); @@ -948,21 +918,10 @@ MemoryContextAllocZeroAligned(MemoryContext context, Size size) AssertArg(MemoryContextIsValid(context)); AssertNotInCriticalSection(context); - if (!AllocSizeIsValid(size)) - elog(ERROR, "invalid memory alloc request size %zu", size); - context->isReset = false; - ret = context->methods->alloc(context, size); - if (unlikely(ret == NULL)) - { - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } + ret = context->methods->alloc(context, size, 0); + Assert(ret != NULL); VALGRIND_MEMPOOL_ALLOC(context, ret, size); @@ -983,26 +942,11 @@ MemoryContextAllocExtended(MemoryContext context, Size size, int flags) AssertArg(MemoryContextIsValid(context)); AssertNotInCriticalSection(context); - if (((flags & MCXT_ALLOC_HUGE) != 0 && !AllocHugeSizeIsValid(size)) || - ((flags & MCXT_ALLOC_HUGE) == 0 && !AllocSizeIsValid(size))) - elog(ERROR, "invalid memory alloc request size %zu", size); - context->isReset = false; - ret = context->methods->alloc(context, size); + ret = context->methods->alloc(context, size, flags); if (unlikely(ret == NULL)) - { - if ((flags & MCXT_ALLOC_NO_OOM) == 0) - { - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } return NULL; - } VALGRIND_MEMPOOL_ALLOC(context, ret, size); @@ -1067,22 +1011,10 @@ palloc(Size size) AssertArg(MemoryContextIsValid(context)); AssertNotInCriticalSection(context); - - if (!AllocSizeIsValid(size)) - elog(ERROR, "invalid memory alloc request size %zu", size); - context->isReset = false; - ret = context->methods->alloc(context, size); - if (unlikely(ret == NULL)) - { - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } + ret = context->methods->alloc(context, size, 0); + Assert(ret != NULL); VALGRIND_MEMPOOL_ALLOC(context, ret, size); @@ -1099,21 +1031,10 @@ palloc0(Size size) AssertArg(MemoryContextIsValid(context)); AssertNotInCriticalSection(context); - if (!AllocSizeIsValid(size)) - elog(ERROR, "invalid memory alloc request size %zu", size); - context->isReset = false; - ret = context->methods->alloc(context, size); - if (unlikely(ret == NULL)) - { - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } + ret = context->methods->alloc(context, size, 0); + Assert(ret != NULL); VALGRIND_MEMPOOL_ALLOC(context, ret, size); @@ -1132,26 +1053,11 @@ palloc_extended(Size size, int flags) AssertArg(MemoryContextIsValid(context)); AssertNotInCriticalSection(context); - if (((flags & MCXT_ALLOC_HUGE) != 0 && !AllocHugeSizeIsValid(size)) || - ((flags & MCXT_ALLOC_HUGE) == 0 && !AllocSizeIsValid(size))) - elog(ERROR, "invalid memory alloc request size %zu", size); - context->isReset = false; - ret = context->methods->alloc(context, size); + ret = context->methods->alloc(context, size, 0); if (unlikely(ret == NULL)) - { - if ((flags & MCXT_ALLOC_NO_OOM) == 0) - { - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } return NULL; - } VALGRIND_MEMPOOL_ALLOC(context, ret, size); @@ -1184,24 +1090,13 @@ repalloc(void *pointer, Size size) MemoryContext context = GetMemoryChunkContext(pointer); void *ret; - if (!AllocSizeIsValid(size)) - elog(ERROR, "invalid memory alloc request size %zu", size); - AssertNotInCriticalSection(context); /* isReset must be false already */ Assert(!context->isReset); - ret = context->methods->realloc(context, pointer, size); - if (unlikely(ret == NULL)) - { - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } + ret = context->methods->realloc(context, pointer, size, 0); + Assert(ret != NULL); VALGRIND_MEMPOOL_CHANGE(context, pointer, ret, size); @@ -1222,21 +1117,9 @@ MemoryContextAllocHuge(MemoryContext context, Size size) AssertArg(MemoryContextIsValid(context)); AssertNotInCriticalSection(context); - if (!AllocHugeSizeIsValid(size)) - elog(ERROR, "invalid memory alloc request size %zu", size); - context->isReset = false; - - ret = context->methods->alloc(context, size); - if (unlikely(ret == NULL)) - { - MemoryContextStats(TopMemoryContext); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"), - errdetail("Failed on request of size %zu in memory context \"%s\".", - size, context->name))); - } + ret = context->methods->alloc(context, size, MCXT_ALLOC_HUGE); + Assert(ret != NULL); VALGRIND_MEMPOOL_ALLOC(context, ret, size); @@ -1254,16 +1137,23 @@ repalloc_huge(void *pointer, Size size) MemoryContext context = GetMemoryChunkContext(pointer); void *ret; - if (!AllocHugeSizeIsValid(size)) - elog(ERROR, "invalid memory alloc request size %zu", size); - AssertNotInCriticalSection(context); /* isReset must be false already */ Assert(!context->isReset); - ret = context->methods->realloc(context, pointer, size); - if (unlikely(ret == NULL)) + ret = context->methods->realloc(context, pointer, size, MCXT_ALLOC_HUGE); + Assert(ret != NULL); + + VALGRIND_MEMPOOL_CHANGE(context, pointer, ret, size); + + return ret; +} + +void * +MemoryContextAllocationFailure(MemoryContext context, Size size, int flags) +{ + if ((flags & MCXT_ALLOC_NO_OOM) == 0) { MemoryContextStats(TopMemoryContext); ereport(ERROR, @@ -1273,9 +1163,13 @@ repalloc_huge(void *pointer, Size size) size, context->name))); } - VALGRIND_MEMPOOL_CHANGE(context, pointer, ret, size); + return NULL; +} - return ret; +void +MemoryContextSizeFailure(MemoryContext context, Size size, int flags) +{ + elog(ERROR, "invalid memory alloc request size %zu", size); } /* @@ -1298,6 +1192,13 @@ MemoryContextStrdup(MemoryContext context, const char *string) char * pstrdup(const char *in) { + /* + * Here, and elsewhere in this module, we show the target context's + * "name" but not its "ident" (if any) in user-visible error messages. + * The "ident" string might contain security-sensitive data, such as + * values in SQL commands. + */ + return MemoryContextStrdup(CurrentMemoryContext, in); } diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c index 553dd7f6674..e4b8275045f 100644 --- a/src/backend/utils/mmgr/slab.c +++ b/src/backend/utils/mmgr/slab.c @@ -126,9 +126,9 @@ typedef struct SlabChunk /* * These functions implement the MemoryContext API for Slab contexts. */ -static void *SlabAlloc(MemoryContext context, Size size); +static void *SlabAlloc(MemoryContext context, Size size, int flags); static void SlabFree(MemoryContext context, void *pointer); -static void *SlabRealloc(MemoryContext context, void *pointer, Size size); +static void *SlabRealloc(MemoryContext context, void *pointer, Size size, int flags); static void SlabReset(MemoryContext context); static void SlabDelete(MemoryContext context); static Size SlabGetChunkSpace(MemoryContext context, void *pointer); @@ -337,7 +337,7 @@ SlabDelete(MemoryContext context) * request could not be completed; memory is added to the slab. */ static void * -SlabAlloc(MemoryContext context, Size size) +SlabAlloc(MemoryContext context, Size size, int flags) { SlabContext *slab = castNode(SlabContext, context); SlabBlock *block; @@ -346,6 +346,12 @@ SlabAlloc(MemoryContext context, Size size) Assert(slab); + /* + * XXX: Probably no need to check for huge allocations, we only support + * one size? Which could theoretically be huge, but that'd not make + * sense... + */ + Assert((slab->minFreeChunks >= 0) && (slab->minFreeChunks < slab->chunksPerBlock)); @@ -583,7 +589,7 @@ SlabFree(MemoryContext context, void *pointer) * realloc is usually used to enlarge the chunk. */ static void * -SlabRealloc(MemoryContext context, void *pointer, Size size) +SlabRealloc(MemoryContext context, void *pointer, Size size, int flags) { SlabContext *slab = castNode(SlabContext, context); -- 2.32.0.rc2
>From 8331119ab405eb8e29cb7a76ea8bfbf7a8914d68 Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Mon, 19 Jul 2021 13:48:14 -0700 Subject: [PATCH 2/2] WIP: slab performance. Discussion: https://postgr.es/m/20210717194333.mr5io3zup3kxa...@alap3.anarazel.de --- src/backend/utils/mmgr/slab.c | 454 ++++++++++++++++++++++------------ 1 file changed, 295 insertions(+), 159 deletions(-) diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c index e4b8275045f..c979553dcdc 100644 --- a/src/backend/utils/mmgr/slab.c +++ b/src/backend/utils/mmgr/slab.c @@ -23,15 +23,18 @@ * global (context) level. This is possible as the chunk size (and thus also * the number of chunks per block) is fixed. * - * On each block, free chunks are tracked in a simple linked list. Contents - * of free chunks is replaced with an index of the next free chunk, forming - * a very simple linked list. Each block also contains a counter of free - * chunks. Combined with the local block-level freelist, it makes it trivial - * to eventually free the whole block. + * On each block, never allocated chunks are tracked by a simple offset, and + * free chunks are tracked in a simple linked list. The offset approach + * avoids needing to iterate over all chunks when allocating a new block, + * which would cause page faults and cache pollution. Contents of free chunks + * is replaced with a pointer to the next free chunk, forming a very simple + * linked list. Each block also contains a counter of free chunks. Combined + * with the local block-level freelist, it makes it trivial to eventually + * free the whole block. * * At the context level, we use 'freelist' to track blocks ordered by number * of free chunks, starting with blocks having a single allocated chunk, and - * with completely full blocks on the tail. + * with completely full blocks on the tail. XXX * * This also allows various optimizations - for example when searching for * free chunk, the allocator reuses space from the fullest blocks first, in @@ -44,7 +47,7 @@ * case this performs as if the pointer was not maintained. * * We cache the freelist index for the blocks with the fewest free chunks - * (minFreeChunks), so that we don't have to search the freelist on every + * (minFreeChunkIndex), so that we don't have to search the freelist on every * SlabAlloc() call, which is quite expensive. * *------------------------------------------------------------------------- @@ -56,6 +59,17 @@ #include "utils/memdebug.h" #include "utils/memutils.h" +struct SlabBlock; +struct SlabChunk; + +/* + * Number of actual freelists + 1, for full blocks. Full blocks are always at + * offset 0. + */ +#define SLAB_FREELIST_COUNT 9 + +#define SLAB_RETAIN_EMPTY_BLOCK_COUNT 10 + /* * SlabContext is a specialized implementation of MemoryContext. */ @@ -68,13 +82,16 @@ typedef struct SlabContext Size blockSize; /* block size */ Size headerSize; /* allocated size of context header */ int chunksPerBlock; /* number of chunks per block */ - int minFreeChunks; /* min number of free chunks in any block */ + int minFreeChunksIndex; /* min number of free chunks in any block XXX */ int nblocks; /* number of blocks allocated */ #ifdef MEMORY_CONTEXT_CHECKING bool *freechunks; /* bitmap of free chunks in a block */ #endif + int nfreeblocks; + dlist_head freeblocks; + int freelist_shift; /* blocks with free space, grouped by number of free chunks: */ - dlist_head freelist[FLEXIBLE_ARRAY_MEMBER]; + dlist_head freelist[SLAB_FREELIST_COUNT]; } SlabContext; /* @@ -83,13 +100,15 @@ typedef struct SlabContext * * node: doubly-linked list of blocks in global freelist * nfree: number of free chunks in this block - * firstFreeChunk: index of the first free chunk + * firstFreeChunk: first free chunk */ typedef struct SlabBlock { dlist_node node; /* doubly-linked list */ - int nfree; /* number of free chunks */ - int firstFreeChunk; /* index of the first free chunk in the block */ + int nfree; /* number of chunks on freelist + unused */ + int nunused; /* number of unused chunks */ + struct SlabChunk *unused; /* */ + struct SlabChunk *firstFreeChunk; /* first free chunk in the block */ } SlabBlock; /* @@ -123,6 +142,35 @@ typedef struct SlabChunk #define SlabChunkIndex(slab, block, chunk) \ (((char *) chunk - SlabBlockStart(block)) / slab->fullChunkSize) +static inline uint8 +SlabFreelistIndex(SlabContext *slab, int freecount) +{ + uint8 index; + + Assert(freecount <= slab->chunksPerBlock); + + /* + * Ensure, without a branch, that index 0 is only used for blocks entirely + * without free chunks. + * XXX: There probably is a cheaper way to do this. Needing to shift twice + * by slab->freelist_shift isn't great. + */ + index = (freecount + (1 << slab->freelist_shift) - 1) >> slab->freelist_shift; + + if (freecount == 0) + Assert(index == 0); + else + Assert(index > 0 && index < (SLAB_FREELIST_COUNT)); + + return index; +} + +static inline dlist_head* +SlabFreelist(SlabContext *slab, int freecount) +{ + return &slab->freelist[SlabFreelistIndex(slab, freecount)]; +} + /* * These functions implement the MemoryContext API for Slab contexts. */ @@ -179,7 +227,6 @@ SlabContextCreate(MemoryContext parent, { int chunksPerBlock; Size fullChunkSize; - Size freelistSize; Size headerSize; SlabContext *slab; int i; @@ -192,11 +239,11 @@ SlabContextCreate(MemoryContext parent, "padding calculation in SlabChunk is wrong"); /* Make sure the linked list node fits inside a freed chunk */ - if (chunkSize < sizeof(int)) - chunkSize = sizeof(int); + if (chunkSize < MAXALIGN(sizeof(void *))) + chunkSize = MAXALIGN(sizeof(void *)); /* chunk, including SLAB header (both addresses nicely aligned) */ - fullChunkSize = sizeof(SlabChunk) + MAXALIGN(chunkSize); + fullChunkSize = sizeof(SlabChunk) + chunkSize; /* Make sure the block can store at least one chunk. */ if (blockSize < fullChunkSize + sizeof(SlabBlock)) @@ -206,16 +253,14 @@ SlabContextCreate(MemoryContext parent, /* Compute maximum number of chunks per block */ chunksPerBlock = (blockSize - sizeof(SlabBlock)) / fullChunkSize; - /* The freelist starts with 0, ends with chunksPerBlock. */ - freelistSize = sizeof(dlist_head) * (chunksPerBlock + 1); - /* * Allocate the context header. Unlike aset.c, we never try to combine * this with the first regular block; not worth the extra complication. + * XXX: What's the evidence for that? */ /* Size of the memory context header */ - headerSize = offsetof(SlabContext, freelist) + freelistSize; + headerSize = sizeof(SlabContext); #ifdef MEMORY_CONTEXT_CHECKING @@ -249,17 +294,34 @@ SlabContextCreate(MemoryContext parent, slab->blockSize = blockSize; slab->headerSize = headerSize; slab->chunksPerBlock = chunksPerBlock; - slab->minFreeChunks = 0; + slab->minFreeChunksIndex = 0; slab->nblocks = 0; + slab->nfreeblocks = 0; + + /* + * Compute a shift that guarantees that shifting chunksPerBlock with it + * yields is smaller than SLAB_FREELIST_COUNT - 1 (one freelist is used for full blocks). + */ + slab->freelist_shift = 0; + while ((slab->chunksPerBlock >> slab->freelist_shift) >= (SLAB_FREELIST_COUNT - 1)) + slab->freelist_shift++; + +#if 0 + elog(LOG, "freelist shift for %d chunks of size %zu is %d, block size %zu", + slab->chunksPerBlock, slab->fullChunkSize, slab->freelist_shift, + slab->blockSize); +#endif + + dlist_init(&slab->freeblocks); /* initialize the freelist slots */ - for (i = 0; i < (slab->chunksPerBlock + 1); i++) + for (i = 0; i < SLAB_FREELIST_COUNT; i++) dlist_init(&slab->freelist[i]); #ifdef MEMORY_CONTEXT_CHECKING /* set the freechunks pointer right after the freelists array */ slab->freechunks - = (bool *) slab + offsetof(SlabContext, freelist) + freelistSize; + = (bool *) slab + sizeof(SlabContext); #endif /* Finally, do the type-independent part of context creation */ @@ -292,8 +354,27 @@ SlabReset(MemoryContext context) SlabCheck(context); #endif + /* release retained empty blocks */ + { + dlist_mutable_iter miter; + + dlist_foreach_modify(miter, &slab->freeblocks) + { + SlabBlock *block = dlist_container(SlabBlock, node, miter.cur); + + dlist_delete(miter.cur); + +#ifdef CLOBBER_FREED_MEMORY + wipe_mem(block, slab->blockSize); +#endif + free(block); + slab->nblocks--; + context->mem_allocated -= slab->blockSize; + } + } + /* walk over freelists and free the blocks */ - for (i = 0; i <= slab->chunksPerBlock; i++) + for (i = 0; i < SLAB_FREELIST_COUNT; i++) { dlist_mutable_iter miter; @@ -312,7 +393,7 @@ SlabReset(MemoryContext context) } } - slab->minFreeChunks = 0; + slab->minFreeChunksIndex = 0; Assert(slab->nblocks == 0); Assert(context->mem_allocated == 0); @@ -342,7 +423,6 @@ SlabAlloc(MemoryContext context, Size size, int flags) SlabContext *slab = castNode(SlabContext, context); SlabBlock *block; SlabChunk *chunk; - int idx; Assert(slab); @@ -352,8 +432,8 @@ SlabAlloc(MemoryContext context, Size size, int flags) * sense... */ - Assert((slab->minFreeChunks >= 0) && - (slab->minFreeChunks < slab->chunksPerBlock)); + Assert((slab->minFreeChunksIndex >= 0) && + (slab->minFreeChunksIndex < SLAB_FREELIST_COUNT)); /* make sure we only allow correct request size */ if (size != slab->chunkSize) @@ -367,25 +447,33 @@ SlabAlloc(MemoryContext context, Size size, int flags) * slab->minFreeChunks == 0 means there are no blocks with free chunks, * thanks to how minFreeChunks is updated at the end of SlabAlloc(). */ - if (slab->minFreeChunks == 0) + if (unlikely(slab->minFreeChunksIndex == 0)) { - block = (SlabBlock *) malloc(slab->blockSize); + if (slab->nfreeblocks > 0) + { + dlist_node *node; - if (block == NULL) - return NULL; + node = dlist_pop_head_node(&slab->freeblocks); + block = dlist_container(SlabBlock, node, node); + slab->nfreeblocks--; + } + else + { + block = (SlabBlock *) malloc(slab->blockSize); + + if (unlikely(block == NULL)) + return NULL; + + slab->nblocks += 1; + context->mem_allocated += slab->blockSize; + } block->nfree = slab->chunksPerBlock; - block->firstFreeChunk = 0; + block->firstFreeChunk = NULL; + block->nunused = slab->chunksPerBlock; + block->unused = (SlabChunk *) SlabBlockStart(block); - /* - * Put all the chunks on a freelist. Walk the chunks and point each - * one to the next one. - */ - for (idx = 0; idx < slab->chunksPerBlock; idx++) - { - chunk = SlabBlockGetChunk(slab, block, idx); - *(int32 *) SlabChunkGetPointer(chunk) = (idx + 1); - } + slab->minFreeChunksIndex = SlabFreelistIndex(slab, slab->chunksPerBlock); /* * And add it to the last freelist with all chunks empty. @@ -393,32 +481,54 @@ SlabAlloc(MemoryContext context, Size size, int flags) * We know there are no blocks in the freelist, otherwise we wouldn't * need a new block. */ - Assert(dlist_is_empty(&slab->freelist[slab->chunksPerBlock])); + Assert(dlist_is_empty(SlabFreelist(slab, slab->chunksPerBlock))); - dlist_push_head(&slab->freelist[slab->chunksPerBlock], &block->node); + dlist_push_head(SlabFreelist(slab, slab->chunksPerBlock), + &block->node); - slab->minFreeChunks = slab->chunksPerBlock; - slab->nblocks += 1; - context->mem_allocated += slab->blockSize; + chunk = block->unused; + block->unused = (SlabChunk *)(((char *) block->unused) + slab->fullChunkSize); + block->nunused--; + } + else + { + /* grab the block from the freelist */ + block = dlist_head_element(SlabBlock, node, + &slab->freelist[slab->minFreeChunksIndex]); + + /* make sure we actually got a valid block, with matching nfree */ + Assert(block != NULL); + Assert(slab->minFreeChunksIndex == SlabFreelistIndex(slab, block->nfree)); + Assert(block->nfree > 0); + + /* we know index of the first free chunk in the block */ + if (block->nunused > 0) + { + chunk = block->unused; + block->unused = (SlabChunk *)(((char *) block->unused) + slab->fullChunkSize); + block->nunused--; + } + else + { + chunk = block->firstFreeChunk; + + /* + * Remove the chunk from the freelist head. The index of the next free + * chunk is stored in the chunk itself. + */ + VALGRIND_MAKE_MEM_DEFINED(SlabChunkGetPointer(chunk), sizeof(void*)); + block->firstFreeChunk = *(SlabChunk **) SlabChunkGetPointer(chunk); + } } - /* grab the block from the freelist (even the new block is there) */ - block = dlist_head_element(SlabBlock, node, - &slab->freelist[slab->minFreeChunks]); + /* make sure the chunk is in the block and that it's marked as empty (XXX?) */ + Assert((char *) chunk >= SlabBlockStart(block)); + Assert((char *) chunk < (((char *)block) + slab->blockSize)); - /* make sure we actually got a valid block, with matching nfree */ - Assert(block != NULL); - Assert(slab->minFreeChunks == block->nfree); - Assert(block->nfree > 0); - - /* we know index of the first free chunk in the block */ - idx = block->firstFreeChunk; - - /* make sure the chunk index is valid, and that it's marked as empty */ - Assert((idx >= 0) && (idx < slab->chunksPerBlock)); - - /* compute the chunk location block start (after the block header) */ - chunk = SlabBlockGetChunk(slab, block, idx); + Assert(block->firstFreeChunk == NULL || ( + (block->firstFreeChunk >= (SlabChunk *) SlabBlockStart(block)) && + block->firstFreeChunk <= (SlabChunk *) (((char *)block) + slab->blockSize)) + ); /* * Update the block nfree count, and also the minFreeChunks as we've @@ -426,48 +536,6 @@ SlabAlloc(MemoryContext context, Size size, int flags) * (because that's how we chose the block). */ block->nfree--; - slab->minFreeChunks = block->nfree; - - /* - * Remove the chunk from the freelist head. The index of the next free - * chunk is stored in the chunk itself. - */ - VALGRIND_MAKE_MEM_DEFINED(SlabChunkGetPointer(chunk), sizeof(int32)); - block->firstFreeChunk = *(int32 *) SlabChunkGetPointer(chunk); - - Assert(block->firstFreeChunk >= 0); - Assert(block->firstFreeChunk <= slab->chunksPerBlock); - - Assert((block->nfree != 0 && - block->firstFreeChunk < slab->chunksPerBlock) || - (block->nfree == 0 && - block->firstFreeChunk == slab->chunksPerBlock)); - - /* move the whole block to the right place in the freelist */ - dlist_delete(&block->node); - dlist_push_head(&slab->freelist[block->nfree], &block->node); - - /* - * And finally update minFreeChunks, i.e. the index to the block with the - * lowest number of free chunks. We only need to do that when the block - * got full (otherwise we know the current block is the right one). We'll - * simply walk the freelist until we find a non-empty entry. - */ - if (slab->minFreeChunks == 0) - { - for (idx = 1; idx <= slab->chunksPerBlock; idx++) - { - if (dlist_is_empty(&slab->freelist[idx])) - continue; - - /* found a non-empty freelist */ - slab->minFreeChunks = idx; - break; - } - } - - if (slab->minFreeChunks == slab->chunksPerBlock) - slab->minFreeChunks = 0; /* Prepare to initialize the chunk header. */ VALGRIND_MAKE_MEM_UNDEFINED(chunk, sizeof(SlabChunk)); @@ -475,6 +543,45 @@ SlabAlloc(MemoryContext context, Size size, int flags) chunk->block = block; chunk->slab = slab; + Assert(slab->minFreeChunksIndex == SlabFreelistIndex(slab, block->nfree + 1)); + + /* move the whole block to the right place in the freelist */ + if (unlikely(SlabFreelistIndex(slab, block->nfree) != slab->minFreeChunksIndex)) + { + slab->minFreeChunksIndex = SlabFreelistIndex(slab, block->nfree); + dlist_delete(&block->node); + dlist_push_head(SlabFreelist(slab, block->nfree), &block->node); + + + /* + * And finally update minFreeChunks, i.e. the index to the block with the + * lowest number of free chunks. We only need to do that when the block + * got full (otherwise we know the current block is the right one). We'll + * simply walk the freelist until we find a non-empty entry. + */ + if (slab->minFreeChunksIndex == 0) + { + for (int idx = 1; idx < SLAB_FREELIST_COUNT; idx++) + { + if (dlist_is_empty(&slab->freelist[idx])) + continue; + + /* found a non-empty freelist */ + slab->minFreeChunksIndex = idx; + break; + } + } + } + +#if 0 + /* + * FIXME: I don't understand what this ever did? It should be unreachable + * I think? + */ + if (slab->minFreeChunks == slab->chunksPerBlock) + slab->minFreeChunks = 0; +#endif + #ifdef MEMORY_CONTEXT_CHECKING /* slab mark to catch clobber of "unused" space */ if (slab->chunkSize < (slab->fullChunkSize - sizeof(SlabChunk))) @@ -496,6 +603,61 @@ SlabAlloc(MemoryContext context, Size size, int flags) return SlabChunkGetPointer(chunk); } +static void pg_noinline +SlabFreeSlow(SlabContext *slab, SlabBlock *block) +{ + dlist_delete(&block->node); + + /* + * See if we need to update the minFreeChunks field for the slab - we only + * need to do that if the block had that number of free chunks + * before we freed one. In that case, we check if there still are blocks + * in the original freelist and we either keep the current value (if there + * still are blocks) or increment it by one (the new block is still the + * one with minimum free chunks). + * + * The one exception is when the block will get completely free - in that + * case we will free it, se we can't use it for minFreeChunks. It however + * means there are no more blocks with free chunks. + */ + if (slab->minFreeChunksIndex == SlabFreelistIndex(slab, block->nfree - 1)) + { + /* Have we removed the last chunk from the freelist? */ + if (dlist_is_empty(&slab->freelist[slab->minFreeChunksIndex])) + { + /* but if we made the block entirely free, we'll free it */ + if (block->nfree == slab->chunksPerBlock) + slab->minFreeChunksIndex = 0; + else + slab->minFreeChunksIndex++; + } + } + + /* If the block is now completely empty, free it (in a way). */ + if (block->nfree == slab->chunksPerBlock) + { + /* + * To avoid constantly freeing/allocating blocks in bursty patterns + * (on most crucially cases of repeatedly allocating and freeing a + * single chunk), retain a small number of blocks. + */ + if (slab->nfreeblocks < SLAB_RETAIN_EMPTY_BLOCK_COUNT) + { + dlist_push_head(&slab->freeblocks, &block->node); + slab->nfreeblocks++; + } + else + { + slab->nblocks--; + slab->header.mem_allocated -= slab->blockSize; + free(block); + } + } + else + dlist_push_head(SlabFreelist(slab, block->nfree), &block->node); + +} + /* * SlabFree * Frees allocated memory; memory is removed from the slab. @@ -503,7 +665,6 @@ SlabAlloc(MemoryContext context, Size size, int flags) static void SlabFree(MemoryContext context, void *pointer) { - int idx; SlabContext *slab = castNode(SlabContext, context); SlabChunk *chunk = SlabPointerGetChunk(pointer); SlabBlock *block = chunk->block; @@ -516,61 +677,26 @@ SlabFree(MemoryContext context, void *pointer) slab->header.name, chunk); #endif - /* compute index of the chunk with respect to block start */ - idx = SlabChunkIndex(slab, block, chunk); - /* add chunk to freelist, and update block nfree count */ - *(int32 *) pointer = block->firstFreeChunk; - block->firstFreeChunk = idx; + *(SlabChunk **) pointer = block->firstFreeChunk; + block->firstFreeChunk = chunk; block->nfree++; Assert(block->nfree > 0); Assert(block->nfree <= slab->chunksPerBlock); #ifdef CLOBBER_FREED_MEMORY - /* XXX don't wipe the int32 index, used for block-level freelist */ - wipe_mem((char *) pointer + sizeof(int32), - slab->chunkSize - sizeof(int32)); + /* XXX don't wipe the SlabChunk* index, used for block-level freelist */ + wipe_mem((char *) pointer + sizeof(SlabChunk*), + slab->chunkSize - sizeof(SlabChunk*)); #endif /* remove the block from a freelist */ - dlist_delete(&block->node); - - /* - * See if we need to update the minFreeChunks field for the slab - we only - * need to do that if there the block had that number of free chunks - * before we freed one. In that case, we check if there still are blocks - * in the original freelist and we either keep the current value (if there - * still are blocks) or increment it by one (the new block is still the - * one with minimum free chunks). - * - * The one exception is when the block will get completely free - in that - * case we will free it, se we can't use it for minFreeChunks. It however - * means there are no more blocks with free chunks. - */ - if (slab->minFreeChunks == (block->nfree - 1)) + if (SlabFreelistIndex(slab, block->nfree) != SlabFreelistIndex(slab, block->nfree - 1)) { - /* Have we removed the last chunk from the freelist? */ - if (dlist_is_empty(&slab->freelist[slab->minFreeChunks])) - { - /* but if we made the block entirely free, we'll free it */ - if (block->nfree == slab->chunksPerBlock) - slab->minFreeChunks = 0; - else - slab->minFreeChunks++; - } + SlabFreeSlow(slab, block); } - /* If the block is now completely empty, free it. */ - if (block->nfree == slab->chunksPerBlock) - { - free(block); - slab->nblocks--; - context->mem_allocated -= slab->blockSize; - } - else - dlist_push_head(&slab->freelist[block->nfree], &block->node); - Assert(slab->nblocks >= 0); Assert(slab->nblocks * slab->blockSize == context->mem_allocated); } @@ -657,7 +783,7 @@ SlabStats(MemoryContext context, /* Include context header in totalspace */ totalspace = slab->headerSize; - for (i = 0; i <= slab->chunksPerBlock; i++) + for (i = 0; i < SLAB_FREELIST_COUNT; i++) { dlist_iter iter; @@ -714,7 +840,7 @@ SlabCheck(MemoryContext context) Assert(slab->chunksPerBlock > 0); /* walk all the freelists */ - for (i = 0; i <= slab->chunksPerBlock; i++) + for (i = 0; i < SLAB_FREELIST_COUNT; i++) { int j, nfree; @@ -723,20 +849,19 @@ SlabCheck(MemoryContext context) /* walk all blocks on this freelist */ dlist_foreach(iter, &slab->freelist[i]) { - int idx; SlabBlock *block = dlist_container(SlabBlock, node, iter.cur); + SlabChunk *cur_chunk; /* * Make sure the number of free chunks (in the block header) * matches position in the freelist. */ - if (block->nfree != i) + if (SlabFreelistIndex(slab, block->nfree) != i) elog(WARNING, "problem in slab %s: number of free chunks %d in block %p does not match freelist %d", name, block->nfree, block, i); /* reset the bitmap of free chunks for this block */ memset(slab->freechunks, 0, (slab->chunksPerBlock * sizeof(bool))); - idx = block->firstFreeChunk; /* * Now walk through the chunks, count the free ones and also @@ -744,20 +869,31 @@ SlabCheck(MemoryContext context) * freelist is stored within the chunks themselves, we have to * walk through the chunks and construct our own bitmap. */ - + cur_chunk = block->firstFreeChunk; nfree = 0; - while (idx < slab->chunksPerBlock) + while (cur_chunk != NULL) { - SlabChunk *chunk; + int idx = SlabChunkIndex(slab, block, cur_chunk); /* count the chunk as free, add it to the bitmap */ nfree++; slab->freechunks[idx] = true; /* read index of the next free chunk */ - chunk = SlabBlockGetChunk(slab, block, idx); - VALGRIND_MAKE_MEM_DEFINED(SlabChunkGetPointer(chunk), sizeof(int32)); - idx = *(int32 *) SlabChunkGetPointer(chunk); + VALGRIND_MAKE_MEM_DEFINED(SlabChunkGetPointer(cur_chunk), sizeof(SlabChunk **)); + cur_chunk = *(SlabChunk **) SlabChunkGetPointer(cur_chunk); + } + + cur_chunk = block->unused; + for (j = 0; j < block->nunused; j++) + { + int idx = SlabChunkIndex(slab, block, cur_chunk); + + /* count the chunk as free, add it to the bitmap */ + nfree++; + slab->freechunks[idx] = true; + + cur_chunk = (SlabChunk *)(((char *) cur_chunk) + slab->fullChunkSize); } for (j = 0; j < slab->chunksPerBlock; j++) -- 2.32.0.rc2