Issue |
142323
|
Summary |
bad codegen for `sqsub` on `aarch64`
|
Labels |
new issue
|
Assignees |
|
Reporter |
folkertdev
|
There appears to be a regression for this code between trunk and 20.1.0:
https://godbolt.org/z/9sa318eEn
```llvm
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-linux-gnu"
define void @foo(ptr dead_on_unwind noalias nocapture noundef writable writeonly sret([8 x i8]) align 8 dereferenceable(8) initializes((0, 8)) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(8) %a, ptr noalias nocapture noundef readonly align 8 dereferenceable(8) %b) unnamed_addr {
start:
%0 = load <1 x i64>, ptr %a, align 8
%1 = load <1 x i64>, ptr %b, align 8
%2 = tail call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %0, <1 x i64> %1) #3
store <1 x i64> %2, ptr %_0, align 8
ret void
}
define void @bar(ptr dead_on_unwind noalias nocapture noundef writable writeonly sret([8 x i8]) align 8 dereferenceable(8) initializes((0, 8)) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(8) %a, ptr noalias nocapture noundef readonly align 8 dereferenceable(8) %b) unnamed_addr {
start:
%0 = load <1 x i64>, ptr %a, align 8
%1 = load <1 x i64>, ptr %b, align 8
%2 = tail call <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64> %0, <1 x i64> %1)
store <1 x i64> %2, ptr %_0, align 8
ret void
}
declare <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64>, <1 x i64>) unnamed_addr #1
declare <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64>, <1 x i64>) #2
```
on trunk
```asm
foo: // @foo
ldr d0, [x0]
ldr d1, [x1]
fmov x9, d1
fmov x10, d0
subs x9, x10, x9
asr x10, x9, #63
eor x10, x10, #0x8000000000000000
csel x9, x10, x9, vs
fmov d0, x9
str d0, [x8]
ret
bar: // @bar
ldr x9, [x1]
ldr x10, [x0]
subs x9, x10, x9
asr x10, x9, #63
eor x10, x10, #0x8000000000000000
csel x9, x10, x9, vs
fmov d0, x9
str d0, [x8]
ret
```
on 20.1.0
```asm
foo: // @foo
ldr d0, [x0]
ldr d1, [x1]
sqsub d0, d0, d1
str d0, [x8]
ret
bar: // @bar
ldr x9, [x1]
ldr x10, [x0]
subs x9, x10, x9
asr x10, x9, #63
eor x10, x10, #0x8000000000000000
csel x9, x10, x9, vs
fmov d0, x9
str d0, [x8]
ret
```
The target-independent version should emit the same as the neon version, but not like that!
cc https://github.com/llvm/llvm-project/issues/94463 https://github.com/llvm/llvm-project/pull/140454 @davemgreen
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs