https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97387

--- Comment #14 from fdlbxtqi <euloanty at live dot com> ---
(In reply to fdlbxtqi from comment #13)
> https://godbolt.org/z/fqGrz1
> 
> After this patch, the assembly generated is much better now. However, it
> still contains many optimization problems.
> 
> The problem is the code like this.
> 
> Let's just walk through the assembly and see the problems here.
> 
> field_number operator-(field_number const& x,field_number const& y) noexcept
> {
>       using namespace intrinsics;
>       using unsigned_type = field_number::value_type;
>       constexpr unsigned_type zero{};
>     field_number f;
>     bool borrow{sub_borrow(false,x[0],y[0],f[0])};
>     borrow=sub_borrow(borrow,x[1],y[1],f[1]);
>     borrow=sub_borrow(borrow,x[2],y[2],f[2]);
>     borrow=sub_borrow(borrow,x[3],y[3],f[3]);
>     unsigned_type v{};
>     sub_borrow(borrow,v,v,v);
>     v&=static_cast<unsigned_type>(38);
>     borrow=sub_borrow(false,f[0],v,f[0]);
>     borrow=sub_borrow(borrow,f[1],zero,f[1]);
>     borrow=sub_borrow(borrow,f[2],zero,f[2]);
>     borrow=sub_borrow(borrow,f[3],zero,f[3]);
>     sub_borrow(borrow,v,v,v);
>     v&=static_cast<unsigned_type>(38);
>     borrow=sub_borrow(false,f[0],v,f[0]);
>     borrow=sub_borrow(borrow,f[1],zero,f[1]);
>     borrow=sub_borrow(borrow,f[2],zero,f[2]);
>     borrow=sub_borrow(borrow,f[2],zero,f[3]);
>     return f;
> }
> 
> 
> _ZN7fast_io10curve25519miERKNS0_12field_numberES3_:
> .LFB5431:
>       .cfi_startproc
>       .cfi_personality 0x3,__gxx_personality_v0
>       movq    %rsi, %rcx
>       movq    %rdx, %rax
>       movq    %rdi, %r8
>       movq    (%rsi), %rdi
>       movq    24(%rcx), %r9
>       subq    (%rdx), %rdi
>       movq    8(%rsi), %rsi
>       sbbq    8(%rdx), %rsi
>       movq    16(%rcx), %rdx
>       sbbq    16(%rax), %rdx
>       movq    24(%rax), %rax
>       sbbq    %rax, %r9
> 
>       movl    $0, %eax
>       movq    %rax, %rcx
> //The value of sbbq to itself does not matter I should be sbbq %r9 %r9 and
> you are done.
> 
>       sbbq    %rax, %rcx
>       andl    $38, %ecx
>       subq    %rcx, %rdi
> 
> //The 2nd problems are these %rax stuffs. The should be just imm 0
> // sbbq $0,%rsi
>       sbbq    %rax, %rsi
>       sbbq    %rax, %rdx
>       sbbq    %rax, %r9
>       sbbq    %rcx, %rcx
>       andl    $38, %ecx
>       subq    %rcx, %rdi
>       sbbq    %rax, %rsi
>       movq    %rdi, -40(%rsp)
>       sbbq    %rax, %rdx
>       movq    %rsi, -32(%rsp)
>       movdqa  -40(%rsp), %xmm0
>       movq    %rdx, -24(%rsp)
>       sbbq    %rax, %rdx
>       movq    %r8, %rax
>       movq    %rdx, -16(%rsp)
>       movdqa  -24(%rsp), %xmm1
>       movups  %xmm0, (%r8)
>       movups  %xmm1, 16(%r8)
>       ret
> 
> https://godbolt.org/z/nKPWx3

I think the optimal instruction number should be 26.

Reply via email to