Issue 128349
Summary [AArch64] Bad codegen for widen followed by vdupq_n_*
Labels backend:AArch64, missed-optimization
Assignees
Reporter nikic
    From https://github.com/rust-lang/rust/issues/137407:

VectorCombine(+InstCombine) perform this transform (https://llvm.godbolt.org/z/veW5oG6Gx):
```llvm
define void @src(ptr %ptr, i16 %x) {
  %ext = zext i16 %x to i32
  %ins = insertelement <1 x i32> poison, i32 %ext, i64 0
  %shuf = shufflevector <1 x i32> %ins, <1 x i32> poison, <4 x i32> zeroinitializer
  %bc = bitcast <4 x i32> %shuf to <8 x i16>
  %add = add <8 x i16> %bc, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
  store <8 x i16> %add, ptr %ptr, align 16
  ret void
}

define void @tgt(ptr %ptr, i16 %x) {
  %1 = insertelement <2 x i16> <i16 poison, i16 0>, i16 %x, i64 0
  %bc = shufflevector <2 x i16> %1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
  %add = add <8 x i16> %bc, splat (i16 1)
  store <8 x i16> %add, ptr %ptr, align 16
  ret void
}
```
Resulting in this codegen (https://llvm.godbolt.org/z/Px83GGq7Y):
```
src: // @src
        movi    v0.8h, #1
        and     w8, w1, #0xffff
 dup     v1.4s, w8
        add     v0.8h, v1.8h, v0.8h
        str q0, [x0]
        ret
tgt:                                    // @tgt
 movi    v0.2d, #0000000000000000
        movi    v1.8h, #1
        mov v0.h[0], w1
        mov     v0.h[2], w1
        mov     v0.h[4], w1
 mov     v0.h[6], w1
        add     v0.8h, v0.8h, v1.8h
        str q0, [x0]
        ret
```
The dup has been replaced by element-wise movs.
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to