Issue |
95994
|
Summary |
Failure to propogate knowledge from comparison into branches
|
Labels |
missed-optimization
|
Assignees |
|
Reporter |
Kmeakin
|
This rust code produces sub-optimal assembly:
https://godbolt.org/z/Ex7jKGzj7
```rust
#[no_mangle]
pub fn starts_with_a(s: &str) -> bool {
s.bytes().next() == Some(b'a')
}
```
```asm
starts_with_a:
cbz x1, .LBB0_2
ldrb w8, [x0]
cmp w8, #97
cset w8, eq
cmp x1, #0
cset w9, ne
and w0, w9, w8
ret
.LBB0_2:
cmp x1, #0
cset w9, ne
and w0, w9, w8
ret
```
The `cmp x1, #0` in each branch is implied true/false by the `cbz x1` test.
The optimal assembly can be produced by removing the `select` in the final basic block:
https://godbolt.org/z/87MKvPjf6
https://alive2.llvm.org/ce/z/VofZ27
```llvm
define i1 @src(ptr %s.0, i64 %s.1) {
bb1:
%0 = icmp ne i64 %s.1, 0
br i1 %0, label %bb2, label %bb3
bb2:
%2 = load i8, ptr %s.0, align 1
%3 = icmp eq i8 %2, 97
br label %bb3
bb3:
%4 = phi i1 [ %3, %bb2 ], [ undef, %bb1 ]
%5 = and i1 %0, %4
ret i1 %5
}
define i1 @tgt(ptr %s.0, i64 %s.1) {
bb1:
%0 = icmp ne i64 %s.1, 0
br i1 %0, label %bb2, label %bb3
bb2:
%2 = load i8, ptr %s.0, align 1
%3 = icmp eq i8 %2, 97
br label %bb3
bb3:
%4 = phi i1 [ %3, %bb2 ], [ 0, %bb1 ]
ret i1 %4
}
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs