Issue 130179
Summary Inlining applies overly-broad function return range to inlined intrinsic
Labels new issue
Assignees
Reporter scottmcm
    Found investigating https://github.com/rust-lang/rust/pull/133984

Given this IR:
```llvm
define noundef range(i8 -1, 3) i8 @rust_i16_partial_ord(i16 noundef %0, i16 noundef %1) unnamed_addr #0 {
  %7 = tail call noundef i8 @llvm.scmp.i8.i16(i16 %0, i16 %1)
  ret i8 %7
}

define noundef zeroext i1 @check_lt_direct_before_inlining(i16 noundef %0, i16 noundef %1, i16 noundef %2, i16 noundef %3) unnamed_addr #0 {
start:
  %_3.i4.i = tail call noundef i8 @rust_i16_partial_ord(i16 %0, i16 %2)
  switch i8 %_3.i4.i, label %bb4.i [
    i8 2, label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
 i8 0, label %bb5.i
  ]

bb5.i: ; preds = %start
  %_0.i.i = icmp ult i16 %1, %3
  br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"

bb4.i: ; preds = %start
  %4 = icmp slt i16 %0, %2
  br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"

"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit": ; preds = %start, %bb5.i, %bb4.i
  %_0.sroa.0.0.i = phi i1 [ %_0.i.i, %bb5.i ], [ %4, %bb4.i ], [ false, %start ]
  ret i1 %_0.sroa.0.0.i
}
```
Today <https://llvm.godbolt.org/z/Wj8cPnK3n> it optimizes to 
```llvm
define noundef zeroext i1 @check_lt_direct_before_inlining(i16 noundef %0, i16 noundef %1, i16 noundef %2, i16 noundef %3) unnamed_addr #0 {
start:
  %4 = tail call noundef range(i8 -1, 3) i8 @llvm.scmp.i8.i16(i16 %0, i16 %2)
 switch i8 %4, label %bb4.i [
    i8 2, label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"
 i8 0, label %bb5.i
  ]

bb5.i: ; preds = %start
  %_0.i.i = icmp ult i16 %1, %3
  br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"

bb4.i: ; preds = %start
  %5 = icmp slt i16 %0, %2
  br label %"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit"

"_ZN4core5tuple65_$LT$impl$u20$core..cmp..PartialOrd$u20$for$u20$$LP$U$C$T$RP$$GT$2lt17h933a2b8cae739748E.exit": ; preds = %bb4.i, %bb5.i, %start
  %_0.sroa.0.0.i = phi i1 [ %_0.i.i, %bb5.i ], [ %5, %bb4.i ], [ false, %start ]
  ret i1 %_0.sroa.0.0.i
}
```

Note, in particular, the inlined intrinsic call at the beginning:
```llvm
start:
  %4 = tail call noundef range(i8 -1, 3) i8 @llvm.scmp.i8.i16(i16 %0, i16 %2)
```
That's an overly-broad range for `scmp`, which is always in [-1, 2).

As a result, follow-up passes don't optimize away that dead `i8 2` arm of the `switch` -- I guess they trust that range and don't look at what they know `scmp` *actually* returns.

So it would be good if either:
1) inlining was smarter about the output range it put on a known intrinsic like this, or
2) some pre-inlining optimized put the correct output range on `scmp`, as
```diff
 define noundef range(i8 -1, 3) i8 @rust_i16_partial_ord(i16 noundef %0, i16 noundef %1) unnamed_addr #0 {
-  %7 = tail call noundef i8 @llvm.scmp.i8.i16(i16 %0, i16 %1)
+  %7 = tail call noundef range(i8 -1, 2) i8 @llvm.scmp.i8.i16(i16 %0, i16 %1)
 ret i8 %7
 }
```
also fixes the problem (<https://llvm.godbolt.org/z/h7a55WKxK>).

_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to