Issue 108731
Summary [x86-64 BMI1] AND-NOT's should be preferred to OR-NOT's because we have `andn`/`vpandn` instructions
Labels new issue
Assignees
Reporter Validark
    Zig code:

```zig
export fn foo(w: u64, x: u64, y: u64, z: u64) u64 {
    var s = w;
    s &= ~(x & y);
    s &= ~(z & ~y);
    return s;
}

export fn fooVec(w: @Vector(16, u8), x: @Vector(16, u8), y: @Vector(16, u8), z: @Vector(16, u8)) @Vector(16, u8) {
    var s = w;
    s &= ~(x & y);
    s &= ~(z & ~y);
    return s;
}
```

Compiled for Znver3:

```asm
foo:
        not     rcx
        and     rsi, rdx
        andn    rax, rsi, rdi
        or      rcx, rdx
 and     rax, rcx
        ret

fooVec:
        vpand   xmm1, xmm2, xmm1
        vpcmpeqd        xmm4, xmm4, xmm4
        vpandn  xmm0, xmm1, xmm0
        vpxor   xmm1, xmm3, xmm4
        vpor    xmm1, xmm1, xmm2
        vpand   xmm0, xmm0, xmm1
        ret
```

I expected it to be `and`+`andn`+`andn`+`andn` for the scalar version, and `vpand`+`vpandn`+`vpandn`+`vpandn` for the vector version. However, it turns `s &= ~(z & ~y);` into `s &= ~z | y;`. While this makes sense on machines without `andn` (and doesn't matter for machines which have `andn` and `orn`, or just `vpternlogq`), with x86-64 BMI1 we only get `andn`. We never got an `orn`, so this optimization hurts us.

LLVM (optimized):

```llvm
define dso_local i64 @foo(i64 %0, i64 %1, i64 %2, i64 %3) local_unnamed_addr {
Entry:
  %4 = and i64 %2, %1
  %5 = xor i64 %4, -1
  %6 = and i64 %5, %0
  %.not = xor i64 %3, -1
  %7 = or i64 %.not, %2
  %8 = and i64 %6, %7
  ret i64 %8
}

declare void @llvm.dbg.value(metadata, metadata, metadata) #1

define dso_local <16 x i8> @fooVec(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3) local_unnamed_addr {
Entry:
  %4 = and <16 x i8> %2, %1
  %5 = xor <16 x i8> %4, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
  %6 = and <16 x i8> %5, %0
  %.not = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
  %7 = or <16 x i8> %.not, %2
  %8 = and <16 x i8> %6, %7
  ret <16 x i8> %8
}
```

[LLVM Godbolt link](https://llvm.godbolt.org/z/ETEdK3fGM)
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to