Issue |
125611
|
Summary |
[AArch64] boolean or+sext can be done with addhn
|
Labels |
new issue
|
Assignees |
|
Reporter |
dzaima
|
https://godbolt.org/z/n3edx6Ko3
This function:
```c
#include<stdbool.h>
#include<arm_neon.h>
bool foo(float64x2_t x, float64x2_t y) {
uint32x2_t any_zeroes = vaddhn_u64(
vceqzq_f64(x),
vceqzq_f64(y)
);
return vget_lane_f64((float64x1_t)any_zeroes, 0) != 0;
}
```
compiles as:
```asm
foo:
fcmeq v0.2d, v0.2d, #0.0
fcmeq v1.2d, v1.2d, #0.0
orr v0.16b, v0.16b, v1.16b
xtn v0.2s, v0.2d
fcmp d0, #0.0
cset w0, ne
ret
```
although the addhn is better:
```asm
foo:
fcmeq v0.2d, v0.2d, 0
fcmeq v1.2d, v1.2d, 0
addhn v1.2s, v0.2d, v1.2d
fcmp d1, #0.0
cset w0, ne
ret
```
```llvm
define dso_local i1 @foo(<2 x double> noundef %x, <2 x double> noundef %y) local_unnamed_addr {
entry:
%0 = fcmp oeq <2 x double> %x, zeroinitializer
%1 = fcmp oeq <2 x double> %y, zeroinitializer
%2 = or <2 x i1> %0, %1
%vaddhn2.i = sext <2 x i1> %2 to <2 x i32>
%3 = bitcast <2 x i32> %vaddhn2.i to <1 x double>
%vget_lane = extractelement <1 x double> %3, i64 0
%cmp = fcmp une double %vget_lane, 0.000000e+00
ret i1 %cmp
}
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs