Issue |
137168
|
Summary |
Missed GEP Optimization for Constant Index
|
Labels |
new issue
|
Assignees |
|
Reporter |
GINN-Imp
|
The following reduced IR is derived from https://github.com/dtcxzyw/llvm-opt-benchmark/blob/314b5d859bb1d19cb93c259e57edafaf11d4fc80/bench/abseil-cpp/original/bounded_utf8_length_sequence_test.ll#L8889
The reduced code is still a bit long, but we've pruned out extranous code as much as possible. Maybe we can reduce it further if we know some of the root causes.
missed optimization: `%66 = getelementptr inbounds nuw [2 x i64], ptr %0, i64 0, i64 %65` -> `%66 = getelementptr inbounds nuw [2 x i64], ptr %0, i64 0, i64 0`
https://godbolt.org/z/G3xf9rh8W
reduced code:
```llvm
define i64 @src(ptr noundef nonnull align 8 dereferenceable(16) %0, i32 noundef %1) {
%5 = alloca i32, align 4
%16 = alloca i32, align 4
store i32 %1, ptr %5, align 4
%21 = load i32, ptr %5, align 4
%22 = icmp uge i32 %21, 64
br i1 %22, label %23, label %24
23: ; preds = %3
store i32 63, ptr %5, align 4
br label %24
24: ; preds = %23, %3
br label %31
31: ; preds = %30, %27
%32 = load i32, ptr %5, align 4
%33 = udiv i32 %32, 32
store i32 %33, ptr %16, align 4
br label %57
57: ; preds = %31
%58 = load i32, ptr %16, align 4
%59 = icmp ugt i32 %58, 0
br i1 %59, label %61, label %60
60: ; preds = %57
ret i64 1
61: ; preds = %57
%63 = load i32, ptr %16, align 4
%64 = sub i32 %63, 1
%65 = zext i32 %64 to i64
%66 = getelementptr inbounds nuw [2 x i64], ptr %0, i64 0, i64 %65
%67 = load i64, ptr %66, align 8
ret i64 %67
}
```
clang-trunk:
```llvm
define i64 @src(ptr noundef nonnull readonly align 8 captures(none) dereferenceable(16) %0, i32 noundef %1) local_unnamed_addr #0 {
%.not = icmp ult i32 %1, 32
br i1 %.not, label %common.ret, label %3
common.ret: ; preds = %2, %3
%common.ret.op = phi i64 [ %8, %3 ], [ 1, %2 ]
ret i64 %common.ret.op
3: ; preds = %2
%spec.store.select = tail call i32 @llvm.umin.i32(i32 %1, i32 63)
%4 = lshr i32 %spec.store.select, 5
%5 = add nsw i32 %4, -1
%6 = zext nneg i32 %5 to i64
%7 = getelementptr inbounds nuw [2 x i64], ptr %0, i64 0, i64 %6
%8 = load i64, ptr %7, align 8
br label %common.ret
}
```
expected code:
```llvm
define i64 @src_optimized(ptr noundef nonnull readonly align 8 captures(none) dereferenceable(16) %0, i32 noundef %1) local_unnamed_addr #0 {
common.ret:
%.not = icmp ult i32 %1, 32
%2 = load i64, ptr %0, align 8
%spec.select = select i1 %.not, i64 1, i64 %2
ret i64 %spec.select
}
```
alive2 timed out. But `opt -O3` produces the same IR for `@src_optimized` and `@tgt`, perhaps proving that the code before and after the desired optimization is equivalent. https://godbolt.org/z/G3xf9rh8W
(`@src_optimized` is obtained after the `@src` is optimized by clang;
`@tgt` is obtained after `%66 = getelementptr inbounds nuw [2 x i64], ptr %0, i64 0, i64 %65` -> `%66 = getelementptr inbounds nuw [2 x i64], ptr %0, i64 0, i64 0`)
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs