https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91681

--- Comment #2 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
GCC 11 is better (note the widen multiply is still not detected):
        movq    %rdx, %rax
        movq    %rdi, %r9
        movq    %rdx, %rdi
        mulq    %rsi
        movq    %rdx, %rax
        xorl    %edx, %edx
        movq    %rax, %r10
        movq    %rdi, %rax
        movq    %rdx, %r11
        mulq    %r9
        addq    %rax, %r10
        movq    %rsi, %rax
        adcq    %rdx, %r11
        mulq    %rcx
        movq    %rcx, %rax
        xorl    %edi, %edi
        movq    %r10, (%r8)
        movq    %r11, 8(%r8)
        movq    %rdx, %rsi
        mulq    %r9
        addq    %rax, %rsi
        adcq    %rdx, %rdi
        movq    %rsi, 16(%r8)
        movq    %rdi, 24(%r8)
        ret

Still 2 more instructions than LLVM but at least no longer using the stack.

Reply via email to