Re: Help with implementing Wine optimization experiment

Daniel Santos Wed, 17 Aug 2016 15:56:54 -0700

On 08/15/2016 05:46 AM, Florian Weimer wrote:

On 08/14/2016 08:23 AM, Daniel Santos wrote:


ms_abi_push_regs:
    pop    %rax
    push   %rdi
    push   %rsi
    sub    $0xa8,%rsp
    movaps %xmm6,(%rsp)
    movaps %xmm7,0x10(%rsp)
    movaps %xmm8,0x20(%rsp)
    movaps %xmm9,0x30(%rsp)
    movaps %xmm10,0x40(%rsp)
    movaps %xmm11,0x50(%rsp)
    movaps %xmm12,0x60(%rsp)
    movaps %xmm13,0x70(%rsp)
    movaps %xmm14,0x80(%rsp)
    movaps %xmm15,0x90(%rsp)
    jmp   *(%rax)

I think this will be quite slow because it breaks the return stackoptimization in the CPU. I think you should push the return addressand use RET.


Florian

Looks like I forgot to reply-all on my last reply, but thanks again forthe advice here. Would there be any performance hit to reshuffling thepush/pops to save the 8 byte alignment padding? My assumption is thatthe stack will always be 16-byte aligned with the 8-byte return addressof the last call on it, so offset by 8 bytes. (Also, not sure that Ineed the .type directive, was copying other code in libgcc :)


    .text
    .global __msabi_save
    .hidden    __msabi_save

#ifdef __ELF__
    .type    __msabi_save,@function
#endif

/* TODO: implement vmovaps when supported?*/
__msabi_save:
#ifdef __x86_64__
    pop    %rax
    push   %rdi
    sub    $0xa0,%rsp
    movaps %xmm6,(%rsp)
    movaps %xmm7,0x10(%rsp)
    movaps %xmm8,0x20(%rsp)
    movaps %xmm9,0x30(%rsp)
    movaps %xmm10,0x40(%rsp)
    movaps %xmm11,0x50(%rsp)
    movaps %xmm12,0x60(%rsp)
    movaps %xmm13,0x70(%rsp)
    movaps %xmm14,0x80(%rsp)
    movaps %xmm15,0x90(%rsp)
    push   %rsi
    push   %rax
#endif /* __x86_64__ */
    ret

    .text
    .global __msabi_restore
    .hidden    __msabi_restore
#ifdef __ELF__
    .type    __msabi_restore,@function
#endif

__msabi_restore:
#ifdef __x86_64__
    pop    %rsi
    movaps (%rsp),%xmm6
    movaps 0x10(%rsp),%xmm7
    movaps 0x20(%rsp),%xmm8
    movaps 0x30(%rsp),%xmm9
    movaps 0x40(%rsp),%xmm10
    movaps 0x50(%rsp),%xmm11
    movaps 0x60(%rsp),%xmm12
    movaps 0x70(%rsp),%xmm13
    movaps 0x80(%rsp),%xmm14
    movaps 0x90(%rsp),%xmm15
    add    $0xa0,%rsp
    pop    %rdi
#endif /* __x86_64__ */
    ret

Thanks!
Daniel

Re: Help with implementing Wine optimization experiment

Reply via email to