Author: Zhang Xiang Date: 2023-12-04T16:25:37+08:00 New Revision: 21fef821eb83d0d426a5e2de20469dcdd41598f1
URL: https://github.com/llvm/llvm-project/commit/21fef821eb83d0d426a5e2de20469dcdd41598f1 DIFF: https://github.com/llvm/llvm-project/commit/21fef821eb83d0d426a5e2de20469dcdd41598f1.diff LOG: [SimplifyCFG] Pre-commit test for folding branches in simplify cfg Added: clang/test/CodeGenCUDA/simplify-cfg-unroll.cu Modified: Removed: ################################################################################ diff --git a/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu b/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu new file mode 100644 index 0000000000000..b6502ce76c298 --- /dev/null +++ b/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu @@ -0,0 +1,86 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: amdgpu-registered-target +// REQUIRES: x86-registered-target +// RUN: %clang_cc1 -O2 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "amdgcn-amd-amdhsa" \ +// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -emit-llvm -o - %s | FileCheck %s + +#include "Inputs/cuda.h" + +__device__ void bar(); + +// CHECK-LABEL: define dso_local void @_Z4funciPPiiS_( +// CHECK-SAME: i32 noundef [[IDX:%.*]], ptr nocapture noundef readonly [[ARR:%.*]], i32 noundef [[DIMS:%.*]], ptr nocapture noundef [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CMP127:%.*]] = icmp eq i32 [[DIMS]], 0 +// CHECK-NEXT: br i1 [[CMP127]], label [[CLEANUP:%.*]], label [[IF_END_PREHEADER:%.*]] +// CHECK: if.end.preheader: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[DIMS]], -1 +// CHECK-NEXT: [[UMIN:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP0]], i32 15) +// CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[UMIN]], 1 +// CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[TMP1]] to i64 +// CHECK-NEXT: [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1 +// CHECK-NEXT: [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2 +// CHECK-NEXT: [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3 +// CHECK-NEXT: br label [[IF_END:%.*]] +// CHECK: if.end: +// CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[IF_END_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END]] ] +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 [[INDVARS_IV]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA7:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP4]], [[TMP3]] +// CHECK-NEXT: store i32 [[ADD14]], ptr [[OUT]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: tail call void @_Z3barv() #[[ATTR3:[0-9]+]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[ADD14_1:%.*]] = add nsw i32 [[TMP7]], [[TMP6]] +// CHECK-NEXT: store i32 [[ADD14_1]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: tail call void @_Z3barv() #[[ATTR3]] +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 2 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[ADD14_2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] +// CHECK-NEXT: store i32 [[ADD14_2]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: tail call void @_Z3barv() #[[ATTR3]] +// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 3 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[ADD14_3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] +// CHECK-NEXT: store i32 [[ADD14_3]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: tail call void @_Z3barv() #[[ATTR3]] +// CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +// CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +// CHECK-NEXT: br i1 [[EXITCOND]], label [[CLEANUP]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK: cleanup: +// CHECK-NEXT: ret void +// +__device__ void func(int Idx, int *Arr[], int Dims, int *Out) { + #pragma unroll + for (int Dim = 0; Dim < 16; ++Dim) { + if (Dim == Dims) { + break; + } + int divmod = Arr[Dim][Idx]; + Idx = divmod + 1; + + for (int arg = 0; arg < 4; arg++) { + Out[arg] += Arr[Dim][arg]; + bar(); + } + } +} +//. +// CHECK: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK: [[META4]] = !{!"any pointer", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK: [[META6]] = !{!"Simple C++ TBAA"} +// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// CHECK: [[META8]] = !{!"int", [[META5]], i64 0} +// CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]]} +// CHECK: [[META10]] = !{!"llvm.loop.mustprogress"} +// CHECK: [[META11]] = !{!"llvm.loop.unroll.enable"} +//. _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits