Issue 137168
Summary Missed GEP Optimization for Constant Index
Labels new issue
Assignees
Reporter GINN-Imp
    The following reduced IR is derived from https://github.com/dtcxzyw/llvm-opt-benchmark/blob/314b5d859bb1d19cb93c259e57edafaf11d4fc80/bench/abseil-cpp/original/bounded_utf8_length_sequence_test.ll#L8889
The reduced code is still a bit long, but we've pruned out extranous code as much as possible. Maybe we can reduce it further if we know some of the root causes.
missed optimization:  `%66 = getelementptr inbounds nuw [2 x i64], ptr %0, i64 0, i64 %65` ->  `%66 = getelementptr inbounds nuw [2 x i64], ptr %0, i64 0, i64 0`
https://godbolt.org/z/G3xf9rh8W

reduced code:
```llvm
define i64 @src(ptr noundef nonnull align 8 dereferenceable(16) %0, i32 noundef %1) {
  %5 = alloca i32, align 4
  %16 = alloca i32, align 4
  store i32 %1, ptr %5, align 4
  %21 = load i32, ptr %5, align 4
  %22 = icmp uge i32 %21, 64
  br i1 %22, label %23, label %24

23:                                               ; preds = %3
 store i32 63, ptr %5, align 4
  br label %24

24: ; preds = %23, %3
  br label %31

31: ; preds = %30, %27
  %32 = load i32, ptr %5, align 4
  %33 = udiv i32 %32, 32
  store i32 %33, ptr %16, align 4
  br label %57

57:                                               ; preds = %31
  %58 = load i32, ptr %16, align 4
  %59 = icmp ugt i32 %58, 0
  br i1 %59, label %61, label %60

60: ; preds = %57
  ret i64 1

61: ; preds = %57
  %63 = load i32, ptr %16, align 4
  %64 = sub i32 %63, 1
  %65 = zext i32 %64 to i64
  %66 = getelementptr inbounds nuw [2 x i64], ptr %0, i64 0, i64 %65
  %67 = load i64, ptr %66, align 8
  ret i64 %67
}
```

clang-trunk:
```llvm
define i64 @src(ptr noundef nonnull readonly align 8 captures(none) dereferenceable(16) %0, i32 noundef %1) local_unnamed_addr #0 {
  %.not = icmp ult i32 %1, 32
  br i1 %.not, label %common.ret, label %3

common.ret:                                       ; preds = %2, %3
  %common.ret.op = phi i64 [ %8, %3 ], [ 1, %2 ]
  ret i64 %common.ret.op

3:                                                ; preds = %2
  %spec.store.select = tail call i32 @llvm.umin.i32(i32 %1, i32 63)
  %4 = lshr i32 %spec.store.select, 5
  %5 = add nsw i32 %4, -1
  %6 = zext nneg i32 %5 to i64
  %7 = getelementptr inbounds nuw [2 x i64], ptr %0, i64 0, i64 %6
  %8 = load i64, ptr %7, align 8
  br label %common.ret
}
```

expected code:
```llvm
define i64 @src_optimized(ptr noundef nonnull readonly align 8 captures(none) dereferenceable(16) %0, i32 noundef %1) local_unnamed_addr #0 {
common.ret:
  %.not = icmp ult i32 %1, 32
  %2 = load i64, ptr %0, align 8
  %spec.select = select i1 %.not, i64 1, i64 %2
  ret i64 %spec.select
}
```

alive2 timed out. But `opt -O3` produces the same IR for `@src_optimized` and `@tgt`, perhaps proving that the code before and after the desired optimization is equivalent. https://godbolt.org/z/G3xf9rh8W
(`@src_optimized` is obtained after the `@src` is optimized by clang; 
`@tgt` is obtained after `%66 = getelementptr inbounds nuw [2 x i64], ptr %0, i64 0, i64 %65` ->  `%66 = getelementptr inbounds nuw [2 x i64], ptr %0, i64 0, i64 0`)
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to