https://github.com/ergawy created https://github.com/llvm/llvm-project/pull/174314
Follow-up on #171597, this PR hoists allocas in a parallel region to the entry block of its corresponding outlined function. This PR does this for the CPU while #171597 introduced the main mechanism to do so and did it for the GPU. >From 0322a513996dd163dbb6f12b44690f1d0cedd4e4 Mon Sep 17 00:00:00 2001 From: ergawy <[email protected]> Date: Sun, 4 Jan 2026 01:00:48 -0600 Subject: [PATCH] [OpenMP][OMPIRBuilder] Hoist static parallel region allocas to the entry block on the CPU Follow-up on #171597, this PR hoists allocas in a parallel region to the entry block of its corresponding outlined function. This PR does this for the CPU while #171597 introduced the main mechanism to do so and did it for the GPU. --- flang/test/Integration/OpenMP/copyprivate.f90 | 2 +- flang/test/Integration/OpenMP/private-global.f90 | 7 ++++--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 2 +- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 9 ++++++--- .../Target/LLVMIR/openmp-parallel-reduction-init.mlir | 2 +- .../Target/LLVMIR/openmp-reduction-array-sections.mlir | 2 +- 6 files changed, 14 insertions(+), 10 deletions(-) diff --git a/flang/test/Integration/OpenMP/copyprivate.f90 b/flang/test/Integration/OpenMP/copyprivate.f90 index e0e4abe015438..43c8612d0a1da 100644 --- a/flang/test/Integration/OpenMP/copyprivate.f90 +++ b/flang/test/Integration/OpenMP/copyprivate.f90 @@ -37,9 +37,9 @@ !CHECK: %[[TID_ADDR:.*]] = alloca i32, align 4 !CHECK: %[[I:.*]] = alloca i32, align 4 !CHECK: %[[J:.*]] = alloca i32, align 4 +!CHECK: %[[DID_IT:.*]] = alloca i32 !CHECK: br label %[[OMP_REDUCTION_INIT:.*]] -!CHECK: %[[DID_IT:.*]] = alloca i32 !CHECK: store i32 0, ptr %[[DID_IT]] !CHECK: %[[THREAD_NUM1:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[LOC:.*]]) !CHECK: %[[RET:.*]] = call i32 @__kmpc_single({{.*}}) diff --git a/flang/test/Integration/OpenMP/private-global.f90 b/flang/test/Integration/OpenMP/private-global.f90 index 8f8de8cdedd3b..978a8fa3c8205 100644 --- a/flang/test/Integration/OpenMP/private-global.f90 +++ b/flang/test/Integration/OpenMP/private-global.f90 @@ -22,15 +22,16 @@ program bug ! CHECK: store i32 %[[VAL_10]], ptr %[[VAL_9]], align 4 ! CHECK: %[[VAL_12:.*]] = load i32, ptr %[[VAL_9]], align 4 ! CHECK: %[[PRIV_BOX_ALLOC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 -! ... -! check that we use the private copy of table for the assignment -! CHECK: omp.par.region1: ! CHECK: %[[ELEMENTAL_TMP:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 +! CHECK: %[[ELEMENTAL_TMP_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 ! CHECK: %[[TABLE_BOX_ADDR:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 ! CHECK: %[[BOXED_FIFTY:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8 ! CHECK: %[[FIFTY:.*]] = alloca i32, i64 1, align 4 ! CHECK: %[[INTERMEDIATE:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 ! CHECK: %[[TABLE_BOX_ADDR2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 +! ... +! check that we use the private copy of table for the assignment +! CHECK: omp.par.region1: ! CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[INTERMEDIATE]], ptr %[[PRIV_BOX_ALLOC]], i32 {{4[48]}}, i1 false) ! CHECK: store i32 50, ptr %[[FIFTY]], align 4 ! CHECK: %[[FIFTY_BOX_VAL:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8 } { ptr undef, i64 4, i32 20240719, i8 0, i8 9, i8 0, i8 0 }, ptr %[[FIFTY]], 0 diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index f764b644edc69..5e4d4c7e49776 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1732,7 +1732,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( hostParallelCallback(this, OutlinedFn, OuterFn, Ident, IfCondition, PrivTID, PrivTIDAddr, ToBeDeletedVec); }; - // TODO: fix-up allocations on the host as well? + OI.FixUpNonEntryAllocas = true; } OI.OuterAllocaBB = OuterAllocaBlock; diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 6011dc6604478..0b8a9765a4b87 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -3513,12 +3513,16 @@ llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) { // CHECK: call void{{.*}}@__kmpc_fork_call({{.*}}, ptr @[[OUTLINED_PARALLEL:.*]], // CHECK: define internal void @[[OUTLINED_PARALLEL]] -// CHECK: distribute.alloca: +// CHECK: omp.par.entry: +// CHECK: %[[TID_LOCAL:.*]] = alloca i32, align 4 // CHECK: %[[LASTITER:.*]] = alloca i32 // CHECK: %[[LB:.*]] = alloca i32 // CHECK: %[[UB:.*]] = alloca i32 // CHECK: %[[STRIDE:.*]] = alloca i32 -// CHECK: br label %[[AFTER_ALLOCA:.*]] +// CHECK: %[[DIST_UB:.*]] = alloca i32 + +// CHECK: distribute.alloca: +// CHECK-NEXT: br label %[[AFTER_ALLOCA:.*]] // CHECK: [[AFTER_ALLOCA]]: // CHECK: br label %[[DISTRIBUTE_BODY:.*]] @@ -3539,7 +3543,6 @@ llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) { // CHECK: store i32 %[[TRIPCOUNT]], ptr %[[UB]] // CHECK: store i32 1, ptr %[[STRIDE]] // CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}}) -// CHECK: %[[DIST_UB:.*]] = alloca i32 // CHECK: call void @__kmpc_dist_for_static_init_{{.*}}(ptr @{{.*}}, i32 %[[TID]], i32 34, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[DIST_UB]], ptr %[[STRIDE]], i32 1, i32 0) // ----- diff --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-init.mlir b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-init.mlir index 7e90ba0f0d937..4a93ed39eb811 100644 --- a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-init.mlir +++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-init.mlir @@ -36,12 +36,12 @@ llvm.func @use_reduction() attributes {fir.bindc_name = "test"} { // CHECK: omp.par.entry: // CHECK: %[[RED_REGION_ALLOC:.*]] = alloca { ptr }, i64 1, align 8 +// CHECK: %[[PAR_REG_VAL:.*]] = alloca { ptr }, i64 1, align 8 // CHECK: omp.par.region: // CHECK: br label %omp.par.region1 // CHECK: omp.par.region1: -// CHECK: %[[PAR_REG_VAL:.*]] = alloca { ptr }, i64 1, align 8 // CHECK: br label %omp.reduction.init // CHECK: omp.reduction.init: diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir index 13f52f054869e..bd3b77587b8a2 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir @@ -89,6 +89,7 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: %[[VAL_20:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 // CHECK: %[[VAL_21:.*]] = alloca ptr, align 8 // CHECK: %[[VAL_14:.*]] = alloca [1 x ptr], align 8 +// CHECK: %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 // CHECK: br label %[[VAL_15:.*]] // CHECK: [[VAL_15]]: @@ -97,7 +98,6 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: [[PAR_REG]]: ; preds = %[[VAL_15]] // CHECK: br label %[[VAL_18:.*]] // CHECK: omp.par.region1: ; preds = %[[PAR_REG]] -// CHECK: %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 // CHECK: br label %[[VAL_22:.*]] // CHECK: omp.reduction.init: ; preds = %[[VAL_16:.*]] _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
