[ whoops, message too big, resending with the attachment compressed ] On Tue, Sep 27, 2016 at 03:14:51PM -0600, Jeff Law wrote: > With transposition issue addressed, the only blocker I see are some > simple testcases we can add to the suite. They don't have to be real > extensive. And one motivating example for the list archives, ideally > the glibc malloc case.
And here is the malloc testcase. A very important (for performance) function is _int_malloc, which starts with static void * _int_malloc (mstate av, size_t bytes) { // [ variable declarations culled ] if (((unsigned long) (bytes) >= (unsigned long) (size_t) (-2 * (unsigned long)(((( __builtin_offsetof ( struct malloc_chunk ,- fd_nextsize ) )+((2 *(sizeof(size_t)) < __alignof__ (long double) ? __alignof__ (long double) : 2 *(sizeof(size_t))) - 1)) & ~((2 *(sizeof(size_t)) < __alignof__ (long double) ? __alignof__ (long double) : 2 *(sizeof(size_t))) - 1)))))) { (__libc_errno = ( 12 )); return 0; } if (__builtin_expect ((av ==- (( void- *)0) ), 0)) { void *p = sysmalloc (nb, av); if (p !=- (( void- *)0) ) alloc_perturb (p, bytes); return p; } which without separate shrink-wrapping ends up as (reordered the blocks): .L._int_malloc: mflr 0 li 9,-65 std 14,-144(1) std 15,-136(1) cmpld 7,4,9 std 16,-128(1) std 17,-120(1) std 18,-112(1) std 19,-104(1) std 20,-96(1) std 21,-88(1) std 22,-80(1) std 23,-72(1) std 0,16(1) std 24,-64(1) std 25,-56(1) std 26,-48(1) std 27,-40(1) std 28,-32(1) std 29,-24(1) std 30,-16(1) std 31,-8(1) stdu 1,-288(1) bgt 7,.L768 addi 14,4,23 mr 15,3 cmpldi 0,14,31 ble 0,.L769 # ... .L768: addis 27,2,__libc_errno@got@tprel@ha li 19,12 ld 28,__libc_errno@got@tprel@l(27) li 3,0 add 17,28,__libc_errno@tls stw 19,0(17) b .L631 # ... .L631: addi 1,1,288 ld 29,16(1) ld 14,-144(1) ld 15,-136(1) ld 16,-128(1) ld 17,-120(1) ld 18,-112(1) ld 19,-104(1) ld 20,-96(1) ld 21,-88(1) ld 22,-80(1) ld 23,-72(1) ld 24,-64(1) mtlr 29 ld 25,-56(1) ld 26,-48(1) ld 27,-40(1) ld 28,-32(1) ld 29,-24(1) ld 30,-16(1) ld 31,-8(1) blr # ... .L769: cmpdi 1,3,0 beq 1,.L715 # ... .L715: li 14,32 .L635: li 4,0 .L762: addi 1,1,288 mr 3,14 ld 14,16(1) ld 15,-136(1) ld 16,-128(1) ld 17,-120(1) ld 18,-112(1) ld 19,-104(1) ld 20,-96(1) ld 21,-88(1) ld 22,-80(1) ld 23,-72(1) ld 24,-64(1) ld 25,-56(1) mtlr 14 ld 26,-48(1) ld 14,-144(1) ld 27,-40(1) ld 28,-32(1) ld 29,-24(1) ld 30,-16(1) ld 31,-8(1) b sysmalloc [ I see have regrename on by default still; doesn't matter much for this test, it's just less readable ] With separate shrink-wrapping we get instead .L._int_malloc: li 9,-65 stdu 1,-288(1) cmpld 7,4,9 bgt 7,.L811 std 14,144(1) addi 14,4,23 std 15,152(1) mr 15,3 cmpldi 0,14,31 std 25,232(1) std 30,272(1) ble 0,.L812 # ... .L811: addis 6,2,__libc_errno@got@tprel@ha li 11,12 ld 10,__libc_errno@got@tprel@l(6) li 3,0 add 12,10,__libc_errno@tls stw 11,0(12) b .L673 # ... .L673: addi 1,1,288 blr # ... .L812: cmpdi 1,3,0 beq 1,.L757 # ... .L757: li 14,32 .L677: mr 3,14 ld 15,152(1) ld 14,144(1) ld 25,232(1) ld 30,272(1) li 4,0 addi 1,1,288 b sysmalloc I'm attaching the full testcase (pre-processed for powerpc64-linux). Segher
malloc.i.gz
Description: GNU Zip compressed data