Issue 125611
Summary [AArch64] boolean or+sext can be done with addhn
Labels new issue
Assignees
Reporter dzaima
    https://godbolt.org/z/n3edx6Ko3

This function:
```c
#include<stdbool.h>
#include<arm_neon.h>
bool foo(float64x2_t x, float64x2_t y) {
  uint32x2_t any_zeroes = vaddhn_u64(
 vceqzq_f64(x),
    vceqzq_f64(y)
  );
  return vget_lane_f64((float64x1_t)any_zeroes, 0) != 0;
}
```
compiles as:
```asm
foo:
        fcmeq   v0.2d, v0.2d, #0.0
        fcmeq   v1.2d, v1.2d, #0.0
        orr     v0.16b, v0.16b, v1.16b
        xtn     v0.2s, v0.2d
        fcmp    d0, #0.0
        cset    w0, ne
 ret
```
although the addhn is better:
```asm
foo:
        fcmeq   v0.2d, v0.2d, 0
        fcmeq   v1.2d, v1.2d, 0
        addhn   v1.2s, v0.2d, v1.2d
        fcmp    d1, #0.0
        cset    w0, ne
 ret
```

```llvm
define dso_local i1 @foo(<2 x double> noundef %x, <2 x double> noundef %y) local_unnamed_addr {
entry:
  %0 = fcmp oeq <2 x double> %x, zeroinitializer
  %1 = fcmp oeq <2 x double> %y, zeroinitializer
  %2 = or <2 x i1> %0, %1
  %vaddhn2.i = sext <2 x i1> %2 to <2 x i32>
  %3 = bitcast <2 x i32> %vaddhn2.i to <1 x double>
 %vget_lane = extractelement <1 x double> %3, i64 0
  %cmp = fcmp une double %vget_lane, 0.000000e+00
  ret i1 %cmp
}
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to