Issue |
130179
|
Summary |
Inlining applies overly-broad function return range to inlined intrinsic
|
Labels |
new issue
|
Assignees |
|
Reporter |
scottmcm
|
Found investigating https://github.com/rust-lang/rust/pull/133984
Given this IR:
```llvm
define noundef range(i8 -1, 3) i8 @rust_i16_partial_ord(i16 noundef %0, i16 noundef %1) unnamed_addr #0 {
%7 = tail call noundef i8 @llvm.scmp.i8.i16(i16 %0, i16 %1)
ret i8 %7
}
define noundef zeroext i1 @check_lt_direct_before_inlining(i16 noundef %0, i16 noundef %1, i16 noundef %2, i16 noundef %3) unnamed_addr #0 {
start:
%_3.i4.i = tail call noundef i8 @rust_i16_partial_ord(i16 %0, i16 %2)
switch i8 %_3.i4.i, label %bb4.i [
i8 2, label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
i8 0, label %bb5.i
]
bb5.i: ; preds = %start
%_0.i.i = icmp ult i16 %1, %3
br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
bb4.i: ; preds = %start
%4 = icmp slt i16 %0, %2
br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit": ; preds = %start, %bb5.i, %bb4.i
%_0.sroa.0.0.i = phi i1 [ %_0.i.i, %bb5.i ], [ %4, %bb4.i ], [ false, %start ]
ret i1 %_0.sroa.0.0.i
}
```
Today <https://llvm.godbolt.org/z/Wj8cPnK3n> it optimizes to
```llvm
define noundef zeroext i1 @check_lt_direct_before_inlining(i16 noundef %0, i16 noundef %1, i16 noundef %2, i16 noundef %3) unnamed_addr #0 {
start:
%4 = tail call noundef range(i8 -1, 3) i8 @llvm.scmp.i8.i16(i16 %0, i16 %2)
switch i8 %4, label %bb4.i [
i8 2, label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
i8 0, label %bb5.i
]
bb5.i: ; preds = %start
%_0.i.i = icmp ult i16 %1, %3
br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
bb4.i: ; preds = %start
%5 = icmp slt i16 %0, %2
br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit": ; preds = %bb4.i, %bb5.i, %start
%_0.sroa.0.0.i = phi i1 [ %_0.i.i, %bb5.i ], [ %5, %bb4.i ], [ false, %start ]
ret i1 %_0.sroa.0.0.i
}
```
Note, in particular, the inlined intrinsic call at the beginning:
```llvm
start:
%4 = tail call noundef range(i8 -1, 3) i8 @llvm.scmp.i8.i16(i16 %0, i16 %2)
```
That's an overly-broad range for `scmp`, which is always in [-1, 2).
As a result, follow-up passes don't optimize away that dead `i8 2` arm of the `switch` -- I guess they trust that range and don't look at what they know `scmp` *actually* returns.
So it would be good if either:
1) inlining was smarter about the output range it put on a known intrinsic like this, or
2) some pre-inlining optimized put the correct output range on `scmp`, as
```diff
define noundef range(i8 -1, 3) i8 @rust_i16_partial_ord(i16 noundef %0, i16 noundef %1) unnamed_addr #0 {
- %7 = tail call noundef i8 @llvm.scmp.i8.i16(i16 %0, i16 %1)
+ %7 = tail call noundef range(i8 -1, 2) i8 @llvm.scmp.i8.i16(i16 %0, i16 %1)
ret i8 %7
}
```
also fixes the problem (<https://llvm.godbolt.org/z/h7a55WKxK>).
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs