Issue 142323
Summary bad codegen for `sqsub` on `aarch64`
Labels new issue
Assignees
Reporter folkertdev
    There appears to be a regression for this code between trunk and 20.1.0:

https://godbolt.org/z/9sa318eEn

```llvm
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-linux-gnu"

define void @foo(ptr dead_on_unwind noalias nocapture noundef writable writeonly sret([8 x i8]) align 8 dereferenceable(8) initializes((0, 8)) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(8) %a, ptr noalias nocapture noundef readonly align 8 dereferenceable(8) %b) unnamed_addr {
start:
  %0 = load <1 x i64>, ptr %a, align 8
  %1 = load <1 x i64>, ptr %b, align 8
  %2 = tail call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %0, <1 x i64> %1) #3
  store <1 x i64> %2, ptr %_0, align 8
  ret void
}

define void @bar(ptr dead_on_unwind noalias nocapture noundef writable writeonly sret([8 x i8]) align 8 dereferenceable(8) initializes((0, 8)) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(8) %a, ptr noalias nocapture noundef readonly align 8 dereferenceable(8) %b) unnamed_addr {
start:
  %0 = load <1 x i64>, ptr %a, align 8
  %1 = load <1 x i64>, ptr %b, align 8
  %2 = tail call <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64> %0, <1 x i64> %1)
  store <1 x i64> %2, ptr %_0, align 8
  ret void
}

declare <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64>, <1 x i64>) unnamed_addr #1

declare <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64>, <1 x i64>) #2
```

on trunk

```asm
foo:                                    // @foo
        ldr     d0, [x0]
        ldr     d1, [x1]
        fmov    x9, d1
        fmov    x10, d0
        subs    x9, x10, x9
        asr x10, x9, #63
        eor     x10, x10, #0x8000000000000000
        csel x9, x10, x9, vs
        fmov    d0, x9
        str     d0, [x8]
 ret
bar:                                    // @bar
        ldr     x9, [x1]
        ldr     x10, [x0]
        subs    x9, x10, x9
        asr x10, x9, #63
        eor     x10, x10, #0x8000000000000000
        csel x9, x10, x9, vs
        fmov    d0, x9
        str     d0, [x8]
 ret
```

on 20.1.0

```asm
foo:                                    // @foo
        ldr     d0, [x0]
        ldr     d1, [x1]
        sqsub   d0, d0, d1
        str     d0, [x8]
        ret
bar: // @bar
        ldr     x9, [x1]
        ldr     x10, [x0]
 subs    x9, x10, x9
        asr     x10, x9, #63
        eor     x10, x10, #0x8000000000000000
        csel    x9, x10, x9, vs
        fmov    d0, x9
        str     d0, [x8]
        ret
``` 

The target-independent version should emit the same as the neon version, but not like that!

cc https://github.com/llvm/llvm-project/issues/94463 https://github.com/llvm/llvm-project/pull/140454 @davemgreen 
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to