Author: Johannes Doerfert Date: 2021-03-30T01:12:45-05:00 New Revision: 03cc8a1ba050f3138c30e7771e29a32fab22e957
URL: https://github.com/llvm/llvm-project/commit/03cc8a1ba050f3138c30e7771e29a32fab22e957 DIFF: https://github.com/llvm/llvm-project/commit/03cc8a1ba050f3138c30e7771e29a32fab22e957.diff LOG: [OpenMP][NFC] Move the `noinline` to the parallel entry point The `noinline` for non-SPMD parallel functions is probably not necessary but as long as we use it we should put it on the outermost parallel function, which is the wrapper, not the actual outlined function. Resolves PR49752 Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D99506 Added: Modified: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp clang/test/OpenMP/nvptx_parallel_codegen.cpp Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 0cef4f0a5684d..51b8670c18b87 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2093,14 +2093,6 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( // Force inline this outlined function at its call site. Fn->setLinkage(llvm::GlobalValue::InternalLinkage); - // Ensure we do not inline the function. This is trivially true for the ones - // passed to __kmpc_fork_call but the ones calles in serialized regions - // could be inlined. This is not a perfect but it is closer to the invariant - // we want, namely, every data environment starts with a new function. - // TODO: We should pass the if condition to the runtime function and do the - // handling there. Much cleaner code. - cast<llvm::Function>(OutlinedFn)->addFnAttr(llvm::Attribute::NoInline); - Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); @@ -4216,6 +4208,15 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper( auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, Twine(OutlinedParallelFn->getName(), "_wrapper"), &CGM.getModule()); + + // Ensure we do not inline the function. This is trivially true for the ones + // passed to __kmpc_fork_call but the ones calles in serialized regions + // could be inlined. This is not a perfect but it is closer to the invariant + // we want, namely, every data environment starts with a new function. + // TODO: We should pass the if condition to the runtime function and do the + // handling there. Much cleaner code. + Fn->addFnAttr(llvm::Attribute::NoInline); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); Fn->setLinkage(llvm::GlobalValue::InternalLinkage); Fn->setDoesNotRecurse(); diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index 593f7fa49bf45..f85d1d43336d8 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -4,7 +4,7 @@ // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix PAR // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix SEQ -// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix SEQ +// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns -disable-O0-optnone | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix SEQ // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix PAR // RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix PAR // expected-no-diagnostics @@ -318,7 +318,8 @@ int bar(int n){ // CHECK: [[EXIT]] // CHECK: ret void -// CHECK: define internal void [[PARALLEL_FN4]]( +// CHECK: noinline +// CHECK-NEXT: define internal void [[PARALLEL_FN4]]( // CHECK: [[A:%.+]] = alloca i[[SZ:32|64]], // CHECK: store i[[SZ]] 45, i[[SZ]]* %a, // CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) @@ -326,6 +327,9 @@ int bar(int n){ // CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT:]] +// CHECK: Function Attrs: convergent noinline norecurse nounwind +// CHECK-NEXT: [[PARALLEL_FN4]]_wrapper + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}_worker() // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}( // CHECK-32: [[A_ADDR:%.+]] = alloca i32, @@ -373,7 +377,6 @@ int bar(int n){ // CHECK: store i32 [[NEW_CC_VAL]], i32* [[CC]], // CHECK: br label - // CHECK: declare i32 @__kmpc_warp_active_thread_mask() #[[#CONVERGENT:]] // CHECK: declare void @__kmpc_syncwarp(i32) #[[#CONVERGENT:]] _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits