Author: Michael Kruse Date: 2022-05-20T15:19:52-05:00 New Revision: acec07005e038ab2891f235ae60ba2f0236bb952
URL: https://github.com/llvm/llvm-project/commit/acec07005e038ab2891f235ae60ba2f0236bb952 DIFF: https://github.com/llvm/llvm-project/commit/acec07005e038ab2891f235ae60ba2f0236bb952.diff LOG: [OpenMP] Fix partial unrolling off-by-one. Even though the comment description is ".unroll_inner.iv < NumIterations", the code emitted a BO_LE ('<=') operator for the inner loop that is to be unrolled. This lead to one additional copy of the body code in a partially unrolled. It only manifests when the unrolled loop is consumed by another loop-associated construct. Fix by using the BO_LT operator instead. The condition for the outer loop and the corresponding code for tiling correctly used BO_LT already. Fixes #55236 Added: Modified: clang/lib/Sema/SemaOpenMP.cpp clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp clang/test/OpenMP/unroll_codegen_for_partial.cpp clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp clang/test/OpenMP/unroll_codegen_tile_for.cpp clang/test/OpenMP/unroll_codegen_unroll_for.cpp clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp Removed: ################################################################################ diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index e4305ba8c1718..42c3dbc181911 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14483,11 +14483,11 @@ StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses, if (!EndOfTile.isUsable()) return StmtError(); ExprResult InnerCond1 = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), - BO_LE, MakeInnerRef(), EndOfTile.get()); + BO_LT, MakeInnerRef(), EndOfTile.get()); if (!InnerCond1.isUsable()) return StmtError(); ExprResult InnerCond2 = - BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LE, MakeInnerRef(), + BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, MakeInnerRef(), MakeNumIterations()); if (!InnerCond2.isUsable()) return StmtError(); diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c index 0bfed911077bf..94d7a14a0bb87 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c @@ -106,12 +106,12 @@ // CHECK-NEXT: %[[TMP15:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 // CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4 // CHECK-NEXT: %[[ADD21:.+]] = add nsw i32 %[[TMP16]], 4 -// CHECK-NEXT: %[[CMP22:.+]] = icmp sle i32 %[[TMP15]], %[[ADD21]] +// CHECK-NEXT: %[[CMP22:.+]] = icmp slt i32 %[[TMP15]], %[[ADD21]] // CHECK-NEXT: br i1 %[[CMP22]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[LAND_RHS]]: // CHECK-NEXT: %[[TMP17:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[CMP24:.+]] = icmp sle i32 %[[TMP17]], 8 +// CHECK-NEXT: %[[CMP24:.+]] = icmp slt i32 %[[TMP17]], 8 // CHECK-NEXT: br label %[[LAND_END]] // CHECK-EMPTY: // CHECK-NEXT: [[LAND_END]]: diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c index 1a2bd117bf98c..c44b2b3202694 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c @@ -114,12 +114,12 @@ double sind(double); // CHECK-NEXT: %[[TMP15:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 // CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4 // CHECK-NEXT: %[[ADD21:.+]] = add nsw i32 %[[TMP16]], 2 -// CHECK-NEXT: %[[CMP22:.+]] = icmp sle i32 %[[TMP15]], %[[ADD21]] +// CHECK-NEXT: %[[CMP22:.+]] = icmp slt i32 %[[TMP15]], %[[ADD21]] // CHECK-NEXT: br i1 %[[CMP22]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[LAND_RHS]]: // CHECK-NEXT: %[[TMP17:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[CMP24:.+]] = icmp sle i32 %[[TMP17]], 8 +// CHECK-NEXT: %[[CMP24:.+]] = icmp slt i32 %[[TMP17]], 8 // CHECK-NEXT: br label %[[LAND_END]] // CHECK-EMPTY: // CHECK-NEXT: [[LAND_END]]: diff --git a/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp b/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp index cafda811d0d0c..693cbf851b991 100644 --- a/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp +++ b/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp @@ -176,14 +176,14 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP39:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 // IR-NEXT: %[[TMP40:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J23]], align 4 // IR-NEXT: %[[ADD50:.+]] = add i32 %[[TMP40]], 2 -// IR-NEXT: %[[CMP51:.+]] = icmp ule i32 %[[TMP39]], %[[ADD50]] +// IR-NEXT: %[[CMP51:.+]] = icmp ult i32 %[[TMP39]], %[[ADD50]] // IR-NEXT: br i1 %[[CMP51]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS]]: // IR-NEXT: %[[TMP41:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 // IR-NEXT: %[[TMP42:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_7]], align 4 // IR-NEXT: %[[ADD52:.+]] = add i32 %[[TMP42]], 1 -// IR-NEXT: %[[CMP53:.+]] = icmp ule i32 %[[TMP41]], %[[ADD52]] +// IR-NEXT: %[[CMP53:.+]] = icmp ult i32 %[[TMP41]], %[[ADD52]] // IR-NEXT: br label %[[LAND_END]] // IR-EMPTY: // IR-NEXT: [[LAND_END]]: diff --git a/clang/test/OpenMP/unroll_codegen_for_partial.cpp b/clang/test/OpenMP/unroll_codegen_for_partial.cpp index 759c436da5831..e1cb7a3db6b80 100644 --- a/clang/test/OpenMP/unroll_codegen_for_partial.cpp +++ b/clang/test/OpenMP/unroll_codegen_for_partial.cpp @@ -114,14 +114,14 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP21:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP22:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4 // IR-NEXT: %[[ADD17:.+]] = add i32 %[[TMP22]], 2 -// IR-NEXT: %[[CMP18:.+]] = icmp ule i32 %[[TMP21]], %[[ADD17]] +// IR-NEXT: %[[CMP18:.+]] = icmp ult i32 %[[TMP21]], %[[ADD17]] // IR-NEXT: br i1 %[[CMP18]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS]]: // IR-NEXT: %[[TMP23:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP24:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4 // IR-NEXT: %[[ADD19:.+]] = add i32 %[[TMP24]], 1 -// IR-NEXT: %[[CMP20:.+]] = icmp ule i32 %[[TMP23]], %[[ADD19]] +// IR-NEXT: %[[CMP20:.+]] = icmp ult i32 %[[TMP23]], %[[ADD19]] // IR-NEXT: br label %[[LAND_END]] // IR-EMPTY: // IR-NEXT: [[LAND_END]]: diff --git a/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp index 914b53252b9c1..1ad3108fcd17d 100644 --- a/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp +++ b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp @@ -143,14 +143,14 @@ extern "C" void func(int start, int end, int step) { // IR-NEXT: %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP27:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4 // IR-NEXT: %[[ADD17:.+]] = add i32 %[[TMP27]], 7 -// IR-NEXT: %[[CMP18:.+]] = icmp ule i32 %[[TMP26]], %[[ADD17]] +// IR-NEXT: %[[CMP18:.+]] = icmp ult i32 %[[TMP26]], %[[ADD17]] // IR-NEXT: br i1 %[[CMP18]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS]]: // IR-NEXT: %[[TMP28:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP29:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4 // IR-NEXT: %[[ADD19:.+]] = add i32 %[[TMP29]], 1 -// IR-NEXT: %[[CMP20:.+]] = icmp ule i32 %[[TMP28]], %[[ADD19]] +// IR-NEXT: %[[CMP20:.+]] = icmp ult i32 %[[TMP28]], %[[ADD19]] // IR-NEXT: br label %[[LAND_END]] // IR-EMPTY: // IR-NEXT: [[LAND_END]]: diff --git a/clang/test/OpenMP/unroll_codegen_tile_for.cpp b/clang/test/OpenMP/unroll_codegen_tile_for.cpp index 2a31df2326e65..f7611f5b35e30 100644 --- a/clang/test/OpenMP/unroll_codegen_tile_for.cpp +++ b/clang/test/OpenMP/unroll_codegen_tile_for.cpp @@ -162,14 +162,14 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4 // IR-NEXT: %[[ADD36:.+]] = add i32 %[[TMP32]], 2 -// IR-NEXT: %[[CMP37:.+]] = icmp ule i32 %[[TMP31]], %[[ADD36]] +// IR-NEXT: %[[CMP37:.+]] = icmp ult i32 %[[TMP31]], %[[ADD36]] // IR-NEXT: br i1 %[[CMP37]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS]]: // IR-NEXT: %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4 // IR-NEXT: %[[ADD38:.+]] = add i32 %[[TMP34]], 1 -// IR-NEXT: %[[CMP39:.+]] = icmp ule i32 %[[TMP33]], %[[ADD38]] +// IR-NEXT: %[[CMP39:.+]] = icmp ult i32 %[[TMP33]], %[[ADD38]] // IR-NEXT: br label %[[LAND_END]] // IR-EMPTY: // IR-NEXT: [[LAND_END]]: diff --git a/clang/test/OpenMP/unroll_codegen_unroll_for.cpp b/clang/test/OpenMP/unroll_codegen_unroll_for.cpp index 5157d4f7ae5b8..ebb79d0c409aa 100644 --- a/clang/test/OpenMP/unroll_codegen_unroll_for.cpp +++ b/clang/test/OpenMP/unroll_codegen_unroll_for.cpp @@ -129,14 +129,14 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP24:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4 // IR-NEXT: %[[TMP25:.+]] = load i32, i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I18]], align 4 // IR-NEXT: %[[ADD23:.+]] = add i32 %[[TMP25]], 2 -// IR-NEXT: %[[CMP24:.+]] = icmp ule i32 %[[TMP24]], %[[ADD23]] +// IR-NEXT: %[[CMP24:.+]] = icmp ult i32 %[[TMP24]], %[[ADD23]] // IR-NEXT: br i1 %[[CMP24]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS]]: // IR-NEXT: %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4 // IR-NEXT: %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4 // IR-NEXT: %[[ADD25:.+]] = add i32 %[[TMP27]], 1 -// IR-NEXT: %[[CMP26:.+]] = icmp ule i32 %[[TMP26]], %[[ADD25]] +// IR-NEXT: %[[CMP26:.+]] = icmp ult i32 %[[TMP26]], %[[ADD25]] // IR-NEXT: br label %[[LAND_END]] // IR-EMPTY: // IR-NEXT: [[LAND_END]]: @@ -156,14 +156,14 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4 // IR-NEXT: %[[ADD30:.+]] = add i32 %[[TMP32]], 2 -// IR-NEXT: %[[CMP31:.+]] = icmp ule i32 %[[TMP31]], %[[ADD30]] +// IR-NEXT: %[[CMP31:.+]] = icmp ult i32 %[[TMP31]], %[[ADD30]] // IR-NEXT: br i1 %[[CMP31]], label %[[LAND_RHS32:.+]], label %[[LAND_END35:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS32]]: // IR-NEXT: %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4 // IR-NEXT: %[[ADD33:.+]] = add i32 %[[TMP34]], 1 -// IR-NEXT: %[[CMP34:.+]] = icmp ule i32 %[[TMP33]], %[[ADD33]] +// IR-NEXT: %[[CMP34:.+]] = icmp ult i32 %[[TMP33]], %[[ADD33]] // IR-NEXT: br label %[[LAND_END35]] // IR-EMPTY: // IR-NEXT: [[LAND_END35]]: diff --git a/clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp b/clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp index add7ecaea01d5..06196259b6aca 100644 --- a/clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp +++ b/clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp @@ -129,14 +129,14 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP24:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4 // IR-NEXT: %[[TMP25:.+]] = load i32, i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I18]], align 4 // IR-NEXT: %[[ADD23:.+]] = add i32 %[[TMP25]], 2 -// IR-NEXT: %[[CMP24:.+]] = icmp ule i32 %[[TMP24]], %[[ADD23]] +// IR-NEXT: %[[CMP24:.+]] = icmp ult i32 %[[TMP24]], %[[ADD23]] // IR-NEXT: br i1 %[[CMP24]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS]]: // IR-NEXT: %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4 // IR-NEXT: %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4 // IR-NEXT: %[[ADD25:.+]] = add i32 %[[TMP27]], 1 -// IR-NEXT: %[[CMP26:.+]] = icmp ule i32 %[[TMP26]], %[[ADD25]] +// IR-NEXT: %[[CMP26:.+]] = icmp ult i32 %[[TMP26]], %[[ADD25]] // IR-NEXT: br label %[[LAND_END]] // IR-EMPTY: // IR-NEXT: [[LAND_END]]: @@ -156,14 +156,14 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4 // IR-NEXT: %[[ADD30:.+]] = add i32 %[[TMP32]], 2 -// IR-NEXT: %[[CMP31:.+]] = icmp ule i32 %[[TMP31]], %[[ADD30]] +// IR-NEXT: %[[CMP31:.+]] = icmp ult i32 %[[TMP31]], %[[ADD30]] // IR-NEXT: br i1 %[[CMP31]], label %[[LAND_RHS32:.+]], label %[[LAND_END35:.+]] // IR-EMPTY: // IR-NEXT: [[LAND_RHS32]]: // IR-NEXT: %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 // IR-NEXT: %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4 // IR-NEXT: %[[ADD33:.+]] = add i32 %[[TMP34]], 1 -// IR-NEXT: %[[CMP34:.+]] = icmp ule i32 %[[TMP33]], %[[ADD33]] +// IR-NEXT: %[[CMP34:.+]] = icmp ult i32 %[[TMP33]], %[[ADD33]] // IR-NEXT: br label %[[LAND_END35]] // IR-EMPTY: // IR-NEXT: [[LAND_END35]]: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits