Issue |
128349
|
Summary |
[AArch64] Bad codegen for widen followed by vdupq_n_*
|
Labels |
backend:AArch64,
missed-optimization
|
Assignees |
|
Reporter |
nikic
|
From https://github.com/rust-lang/rust/issues/137407:
VectorCombine(+InstCombine) perform this transform (https://llvm.godbolt.org/z/veW5oG6Gx):
```llvm
define void @src(ptr %ptr, i16 %x) {
%ext = zext i16 %x to i32
%ins = insertelement <1 x i32> poison, i32 %ext, i64 0
%shuf = shufflevector <1 x i32> %ins, <1 x i32> poison, <4 x i32> zeroinitializer
%bc = bitcast <4 x i32> %shuf to <8 x i16>
%add = add <8 x i16> %bc, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
store <8 x i16> %add, ptr %ptr, align 16
ret void
}
define void @tgt(ptr %ptr, i16 %x) {
%1 = insertelement <2 x i16> <i16 poison, i16 0>, i16 %x, i64 0
%bc = shufflevector <2 x i16> %1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
%add = add <8 x i16> %bc, splat (i16 1)
store <8 x i16> %add, ptr %ptr, align 16
ret void
}
```
Resulting in this codegen (https://llvm.godbolt.org/z/Px83GGq7Y):
```
src: // @src
movi v0.8h, #1
and w8, w1, #0xffff
dup v1.4s, w8
add v0.8h, v1.8h, v0.8h
str q0, [x0]
ret
tgt: // @tgt
movi v0.2d, #0000000000000000
movi v1.8h, #1
mov v0.h[0], w1
mov v0.h[2], w1
mov v0.h[4], w1
mov v0.h[6], w1
add v0.8h, v0.8h, v1.8h
str q0, [x0]
ret
```
The dup has been replaced by element-wise movs.
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs