https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98856

--- Comment #23 from Richard Biener <rguenth at gcc dot gnu.org> ---
Created attachment 50300
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=50300&action=edit
preprocessed source of the important Botan TU

This is the full preprocessed source of the TU.  When compiled with -Ofast
-march=znver2 look for poly_double_n_le in the assembly, in the prologue the
function jumps based on kernel size - size 16 is the important one:

        cmpq    $16, %rdx
        je      .L54
...
.L54:
        .cfi_restore_state
        vmovdqu (%rsi), %xmm4
        vmovdqa %xmm4, 16(%rsp)
        movq    24(%rsp), %rdx
        vmovdqa 16(%rsp), %xmm5
        shrq    $63, %rdx
        imulq   $135, %rdx, %rcx
        movq    16(%rsp), %rdx
        vmovq   %rcx, %xmm0
        vpsllq  $1, %xmm5, %xmm1
        shrq    $63, %rdx
        vpinsrq $1, %rdx, %xmm0, %xmm0
        vpxor   %xmm1, %xmm0, %xmm0
        vmovdqu %xmm0, (%rdi)
        leaq    -16(%rbp), %rsp
        popq    %r12
        popq    %r13
        popq    %rbp
        .cfi_remember_state
        .cfi_def_cfa 7, 8
        ret

Reply via email to