https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97387

--- Comment #13 from fdlbxtqi <euloanty at live dot com> ---
https://godbolt.org/z/fqGrz1

After this patch, the assembly generated is much better now. However, it still
contains many optimization problems.

The problem is the code like this.

Let's just walk through the assembly and see the problems here.

field_number operator-(field_number const& x,field_number const& y) noexcept
{
        using namespace intrinsics;
        using unsigned_type = field_number::value_type;
        constexpr unsigned_type zero{};
    field_number f;
    bool borrow{sub_borrow(false,x[0],y[0],f[0])};
    borrow=sub_borrow(borrow,x[1],y[1],f[1]);
    borrow=sub_borrow(borrow,x[2],y[2],f[2]);
    borrow=sub_borrow(borrow,x[3],y[3],f[3]);
    unsigned_type v{};
    sub_borrow(borrow,v,v,v);
    v&=static_cast<unsigned_type>(38);
    borrow=sub_borrow(false,f[0],v,f[0]);
    borrow=sub_borrow(borrow,f[1],zero,f[1]);
    borrow=sub_borrow(borrow,f[2],zero,f[2]);
    borrow=sub_borrow(borrow,f[3],zero,f[3]);
    sub_borrow(borrow,v,v,v);
    v&=static_cast<unsigned_type>(38);
    borrow=sub_borrow(false,f[0],v,f[0]);
    borrow=sub_borrow(borrow,f[1],zero,f[1]);
    borrow=sub_borrow(borrow,f[2],zero,f[2]);
    borrow=sub_borrow(borrow,f[2],zero,f[3]);
    return f;
}


_ZN7fast_io10curve25519miERKNS0_12field_numberES3_:
.LFB5431:
        .cfi_startproc
        .cfi_personality 0x3,__gxx_personality_v0
        movq    %rsi, %rcx
        movq    %rdx, %rax
        movq    %rdi, %r8
        movq    (%rsi), %rdi
        movq    24(%rcx), %r9
        subq    (%rdx), %rdi
        movq    8(%rsi), %rsi
        sbbq    8(%rdx), %rsi
        movq    16(%rcx), %rdx
        sbbq    16(%rax), %rdx
        movq    24(%rax), %rax
        sbbq    %rax, %r9

        movl    $0, %eax
        movq    %rax, %rcx
//The value of sbbq to itself does not matter I should be sbbq %r9 %r9 and you
are done.

        sbbq    %rax, %rcx
        andl    $38, %ecx
        subq    %rcx, %rdi

//The 2nd problems are these %rax stuffs. The should be just imm 0
// sbbq $0,%rsi
        sbbq    %rax, %rsi
        sbbq    %rax, %rdx
        sbbq    %rax, %r9
        sbbq    %rcx, %rcx
        andl    $38, %ecx
        subq    %rcx, %rdi
        sbbq    %rax, %rsi
        movq    %rdi, -40(%rsp)
        sbbq    %rax, %rdx
        movq    %rsi, -32(%rsp)
        movdqa  -40(%rsp), %xmm0
        movq    %rdx, -24(%rsp)
        sbbq    %rax, %rdx
        movq    %r8, %rax
        movq    %rdx, -16(%rsp)
        movdqa  -24(%rsp), %xmm1
        movups  %xmm0, (%r8)
        movups  %xmm1, 16(%r8)
        ret

https://godbolt.org/z/nKPWx3

Reply via email to