On Tue, 20 Jul 2021 at 19:04, Andres Freund <[email protected]> wrote:
> > * AllocateSetAlloc.txt
> > * palloc.txt
> > * percent.txt
>
> Huh, that's interesting. You have some control flow enforcement stuff turned
> on (the endbr64). And it looks like it has a non zero cost (or maybe it's
> just skid). Did you enable that intentionally? If not, what
> compiler/version/distro is it? I think at least on GCC that's
> -fcf-protection=...
It's ubuntu 21.04 with gcc 10.3 (specifically gcc version 10.3.0
(Ubuntu 10.3.0-1ubuntu1)
I've attached the same results from compiling with clang 12
(12.0.0-3ubuntu1~21.04.1)
David
Percent | Source code & Disassembly of postgres for cycles (707 samples,
percent: local period)
-----------------------------------------------------------------------------------------------------
:
:
:
: Disassembly of section .text:
:
: 00000000008e7c10 <AllocSetAlloc>:
: AllocSetAlloc():
:
: /*
: * If requested size exceeds maximum for chunks, allocate
an entire block
: * for this request.
: */
: if (unlikely(size > set->allocChunkLimit))
7.48 : 8e7c10: cmp %rsi,0xc8(%rdi)
3.26 : 8e7c17: jb 8e7c81 <AllocSetAlloc+0x71>
0.00 : 8e7c19: xor %eax,%eax
: AllocSetFreeIndex():
: if (size > (1 << ALLOC_MINBITS))
0.44 : 8e7c1b: cmp $0x9,%rsi
0.00 : 8e7c1f: jb 8e7c2d <AllocSetAlloc+0x1d>
: idx = 31 - __builtin_clz((uint32) size - 1) -
ALLOC_MINBITS + 1;
0.00 : 8e7c21: add $0xffffffff,%esi
0.98 : 8e7c24: bsr %esi,%eax
9.59 : 8e7c27: xor $0xffffffe0,%eax
1.44 : 8e7c2a: add $0x1e,%eax
: AllocSetAlloc():
: * corresponding free list to see if there is a free chunk
we could reuse.
: * If one is found, remove it from the free list, make it
again a member
: * of the alloc set and return its data address.
: */
: fidx = AllocSetFreeIndex(size);
: chunk = set->freelist[fidx];
1.67 : 8e7c2d: movslq %eax,%rcx
4.10 : 8e7c30: mov 0x58(%rdi,%rcx,8),%rax
: if (chunk != NULL)
15.97 : 8e7c35: test %rax,%rax
0.28 : 8e7c38: je 8e7c45 <AllocSetAlloc+0x35>
: {
: Assert(chunk->size >= size);
:
: set->freelist[fidx] = (AllocChunk) chunk->aset;
0.00 : 8e7c3a: mov 0x8(%rax),%rdx
13.33 : 8e7c3e: mov %rdx,0x58(%rdi,%rcx,8)
0.28 : 8e7c43: jmp 8e7c73 <AllocSetAlloc+0x63>
0.00 : 8e7c45: mov $0x8,%eax
: }
:
: /*
: * Choose the actual chunk size to allocate.
: */
: chunk_size = (1 << ALLOC_MINBITS) << fidx;
0.71 : 8e7c4a: shl %cl,%eax
0.15 : 8e7c4c: movslq %eax,%rsi
:
: /*
: * If there is enough room in the active allocation block,
we will put the
: * chunk into that block. Else must start a new one.
: */
: if ((block = set->blocks) != NULL)
0.43 : 8e7c4f: mov 0x50(%rdi),%rdx
1.13 : 8e7c53: test %rdx,%rdx
0.14 : 8e7c56: je 8e7c7c <AllocSetAlloc+0x6c>
: {
: Size availspace = block->endptr -
block->freeptr;
0.00 : 8e7c58: mov 0x18(%rdx),%rax
6.98 : 8e7c5c: mov 0x20(%rdx),%rcx
2.30 : 8e7c60: sub %rax,%rcx
:
: if (unlikely(availspace < (chunk_size +
ALLOC_CHUNKHDRSZ)))
0.00 : 8e7c63: lea 0x10(%rsi),%r8
0.14 : 8e7c67: cmp %r8,%rcx
2.02 : 8e7c6a: jb 8e7c86 <AllocSetAlloc+0x76>
: chunk = (AllocChunk) (block->freeptr);
:
: /* Prepare to initialize the chunk header. */
: VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ);
:
: chunk->size = chunk_size;
2.04 : 8e7c6c: mov %rsi,(%rax)
:
: block->freeptr += (chunk_size + ALLOC_CHUNKHDRSZ);
20.16 : 8e7c6f: add %r8,0x18(%rdx)
0.28 : 8e7c73: mov %rdi,0x8(%rax)
4.70 : 8e7c77: add $0x10,%rax
: Assert(block->freeptr <= block->endptr);
:
: return AllocSetAllocReturnChunk(set, size, chunk,
chunk_size);
: }
0.00 : 8e7c7b: ret
: return AllocSetAllocFromNewBlock(set, size, chunk_size);
0.00 : 8e7c7c: jmp 8e8470 <AllocSetAllocFromNewBlock>
: return AllocSetAllocLarge(set, size, flags);
0.00 : 8e7c81: jmp 8e8330 <AllocSetAllocLarge>
: return AllocSetAllocCarveOldAndAlloc(set, size,
chunk_size,
0.00 : 8e7c86: jmp 8e83e0 <AllocSetAllocCarveOldAndAlloc>
Percent | Source code & Disassembly of postgres for cycles (123 samples,
percent: local period)
-----------------------------------------------------------------------------------------------------
:
:
:
: Disassembly of section .text:
:
: 00000000008ee3a0 <palloc>:
: palloc():
: MemoryContextStatsDetail(TopMemoryContext, 100, false);
: }
:
: void *
: palloc(Size size)
: {
9.12 : 8ee3a0: mov %rdi,%rsi
: /* duplicates MemoryContextAlloc to avoid increased
overhead */
: void *ret;
: MemoryContext context = CurrentMemoryContext;
2.47 : 8ee3a3: mov 0x295a86(%rip),%rdi # b83e30
<CurrentMemoryContext>
:
: AssertArg(MemoryContextIsValid(context));
: AssertNotInCriticalSection(context);
: context->isReset = false;
22.83 : 8ee3aa: movb $0x0,0x4(%rdi)
:
: ret = context->methods->alloc(context, size, 0);
34.25 : 8ee3ae: mov 0x10(%rdi),%rax
7.68 : 8ee3b2: mov (%rax),%rax
23.66 : 8ee3b5: xor %edx,%edx
0.00 : 8ee3b7: jmp *%rax
2.27% postgres postgres [.] AllocSetAlloc
0.59% postgres postgres [.] pfree
0.44% postgres postgres [.] MemoryContextAllocZero
0.39% postgres postgres [.] palloc
0.34% postgres postgres [.] MemoryContextAllocZeroAligned
0.27% postgres postgres [.] palloc0
0.17% postgres postgres [.] AllocSetAllocCarveOldAndAlloc
0.09% postgres postgres [.] MemoryContextAlloc
0.02% postgres postgres [.] AllocSetAllocLarge
0.01% postgres postgres [.] AllocSetAllocFromNewBlock