https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97387
--- Comment #13 from fdlbxtqi <euloanty at live dot com> --- https://godbolt.org/z/fqGrz1 After this patch, the assembly generated is much better now. However, it still contains many optimization problems. The problem is the code like this. Let's just walk through the assembly and see the problems here. field_number operator-(field_number const& x,field_number const& y) noexcept { using namespace intrinsics; using unsigned_type = field_number::value_type; constexpr unsigned_type zero{}; field_number f; bool borrow{sub_borrow(false,x[0],y[0],f[0])}; borrow=sub_borrow(borrow,x[1],y[1],f[1]); borrow=sub_borrow(borrow,x[2],y[2],f[2]); borrow=sub_borrow(borrow,x[3],y[3],f[3]); unsigned_type v{}; sub_borrow(borrow,v,v,v); v&=static_cast<unsigned_type>(38); borrow=sub_borrow(false,f[0],v,f[0]); borrow=sub_borrow(borrow,f[1],zero,f[1]); borrow=sub_borrow(borrow,f[2],zero,f[2]); borrow=sub_borrow(borrow,f[3],zero,f[3]); sub_borrow(borrow,v,v,v); v&=static_cast<unsigned_type>(38); borrow=sub_borrow(false,f[0],v,f[0]); borrow=sub_borrow(borrow,f[1],zero,f[1]); borrow=sub_borrow(borrow,f[2],zero,f[2]); borrow=sub_borrow(borrow,f[2],zero,f[3]); return f; } _ZN7fast_io10curve25519miERKNS0_12field_numberES3_: .LFB5431: .cfi_startproc .cfi_personality 0x3,__gxx_personality_v0 movq %rsi, %rcx movq %rdx, %rax movq %rdi, %r8 movq (%rsi), %rdi movq 24(%rcx), %r9 subq (%rdx), %rdi movq 8(%rsi), %rsi sbbq 8(%rdx), %rsi movq 16(%rcx), %rdx sbbq 16(%rax), %rdx movq 24(%rax), %rax sbbq %rax, %r9 movl $0, %eax movq %rax, %rcx //The value of sbbq to itself does not matter I should be sbbq %r9 %r9 and you are done. sbbq %rax, %rcx andl $38, %ecx subq %rcx, %rdi //The 2nd problems are these %rax stuffs. The should be just imm 0 // sbbq $0,%rsi sbbq %rax, %rsi sbbq %rax, %rdx sbbq %rax, %r9 sbbq %rcx, %rcx andl $38, %ecx subq %rcx, %rdi sbbq %rax, %rsi movq %rdi, -40(%rsp) sbbq %rax, %rdx movq %rsi, -32(%rsp) movdqa -40(%rsp), %xmm0 movq %rdx, -24(%rsp) sbbq %rax, %rdx movq %r8, %rax movq %rdx, -16(%rsp) movdqa -24(%rsp), %xmm1 movups %xmm0, (%r8) movups %xmm1, 16(%r8) ret https://godbolt.org/z/nKPWx3