Issue 170039
Summary Inefficient AVX512 code for simple count zero-bytes loop.
Labels
Assignees
Reporter the8472
    https://rust.godbolt.org/z/TY7hT19oc

```rust
#[inline(never)]
pub fn foo(bar: &[u8; 32]) -> u16 {
    u16::try_from(bar.iter().filter(|v| **v == 0).count()).unwrap()
}
```

<details><summary>znver5</summary>

```assembly
example[f5d875b554c12ca0]::foo:
 push    rbp
        push    r15
        push    r14
        push r13
        push    r12
        push    rbx
        vmovdqu ymm0, ymmword ptr [rdi]
        vptestnmb       k0, ymm0, ymm0
        kshiftrd k1, k0, 1
        kmovd   r9d, k0
        kmovd   ecx, k1
        kshiftrd k1, k0, 2
        and     r9d, 1
        kmovd   r13d, k1
 kshiftrd        k1, k0, 3
        and     ecx, 1
        kmovd   esi, k1
 kshiftrd        k1, k0, 4
        add     ecx, r9d
        and r13d, 1
        kmovd   ebx, k1
        kshiftrd        k1, k0, 5
 and     esi, 1
        kmovd   r15d, k1
        kshiftrd        k1, k0, 6
 and     ebx, 1
        add     esi, r13d
        kmovd   edx, k1
 kshiftrd        k1, k0, 7
        and     r15d, 1
        add     esi, ecx
        kmovd   ebp, k1
        kshiftrd        k1, k0, 8
        add r15d, ebx
        and     edx, 1
        kmovd   r12d, k1
 kshiftrd        k1, k0, 9
        add     edx, r15d
        and     ebp, 1
        kmovd   r14d, k1
        kshiftrd        k1, k0, 10
        add edx, esi
        and     r12d, 1
        kmovd   eax, k1
 kshiftrd        k1, k0, 11
        add     r12d, ebp
        and     r14d, 1
        kmovd   edi, k1
        kshiftrd        k1, k0, 12
        add r14d, r12d
        and     eax, 1
        kmovd   r11d, k1
 kshiftrd        k1, k0, 13
        mov     dword ptr [rsp - 8], edi
 add     eax, r14d
        kmovd   r8d, k1
        kshiftrd        k1, k0, 14
        and     r11d, 1
        add     eax, edx
        kmovd   edi, k1
        kshiftrd        k1, k0, 15
        and     r8d, 1
        kmovd r9d, k1
        kshiftrd        k1, k0, 16
        and     edi, 1
 kmovd   r10d, k1
        kshiftrd        k1, k0, 17
        and     r9d, 1
        mov     dword ptr [rsp - 4], r10d
        kmovd   r10d, k1
 kshiftrd        k1, k0, 18
        kmovd   ebx, k1
        kshiftrd k1, k0, 19
        and     r10d, 1
        kmovd   r15d, k1
 kshiftrd        k1, k0, 20
        and     ebx, 1
        kmovd   r13d, k1
        kshiftrd        k1, k0, 21
        and     r15d, 1
 kmovd   esi, k1
        kshiftrd        k1, k0, 22
        and     r13d, 1
        kmovd   ecx, k1
        kshiftrd        k1, k0, 23
        and esi, 1
        mov     dword ptr [rsp - 12], ecx
        mov     ecx, dword ptr [rsp - 8]
        kmovd   r14d, k1
        kshiftrd        k1, k0, 24
        kmovd   edx, k1
        kshiftrd        k1, k0, 25
 and     r14d, 1
        kmovd   ebp, k1
        kshiftrd        k1, k0, 26
        and     edx, 1
        and     ebp, 1
        and     ecx, 1
 add     r11d, ecx
        add     r8d, r11d
        kmovd   r11d, k1
        kshiftrd        k1, k0, 27
        add     edi, r8d
 kmovd   r8d, k1
        kshiftrd        k1, k0, 28
        and     r11d, 1
        add     r9d, edi
        kmovd   edi, k1
        kshiftrd k1, k0, 29
        and     r8d, 1
        add     r9d, eax
        mov eax, dword ptr [rsp - 4]
        kmovd   r12d, k1
        kshiftrd k1, k0, 30
        kshiftrd        k0, k0, 31
        and     edi, 1
 kmovd   ecx, k1
        and     r12d, 1
        and     ecx, 1
 add     ecx, r12d
        and     eax, 1
        add     r10d, eax
 kmovd   eax, k0
        add     ebx, r10d
        mov     r10d, dword ptr [rsp - 12]
        and     eax, 1
        add     r15d, ebx
        add eax, ecx
        add     r13d, r15d
        add     esi, r13d
        add esi, r9d
        and     r10d, 1
        add     r14d, r10d
 add     edx, r14d
        add     ebp, edx
        add     r11d, ebp
 add     r8d, r11d
        add     edi, r8d
        add     edi, esi
 add     eax, edi
        pop     rbx
        pop     r12
        pop r13
        pop     r14
        pop     r15
        pop     rbp
 vzeroupper
 ret
```

</details>


<details><summary>znver2</summary>

```assembly
example[f5d875b554c12ca0]::foo:
 vpxor   xmm0, xmm0, xmm0
        vpcmpeqb        ymm0, ymm0, ymmword ptr [rdi]
        vpmovmskb       eax, ymm0
        popcnt  eax, eax
 vzeroupper
        ret
```

</details>
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to