Author: Johannes Doerfert Date: 2020-12-17T14:38:26-06:00 New Revision: 994bb6eb7d01db1d9461e54d17a63af2ba1af2c9
URL: https://github.com/llvm/llvm-project/commit/994bb6eb7d01db1d9461e54d17a63af2ba1af2c9 DIFF: https://github.com/llvm/llvm-project/commit/994bb6eb7d01db1d9461e54d17a63af2ba1af2c9.diff LOG: [OpenMP][NFC] Provide a new remark and documentation If a GPU function is externally reachable we give up trying to find the (unique) kernel it is called from. This can hinder optimizations. Emit a remark and explain mitigation strategies. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D93439 Added: Modified: clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c clang/test/OpenMP/remarks_parallel_in_target_state_machine.c llvm/lib/Transforms/IPO/OpenMPOpt.cpp openmp/docs/remarks/OptimizationRemarks.rst Removed: ################################################################################ diff --git a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c index 163f0b92468a..d5b5530fc361 100644 --- a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c +++ b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c @@ -4,7 +4,7 @@ // host-no-diagnostics -void bar1(void) { +void bar1(void) { // all-remark {{[OMP100] Potentially unknown OpenMP target region caller}} #pragma omp parallel // #0 // all-remark@#0 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} // safe-remark@#0 {{Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}} @@ -13,7 +13,7 @@ void bar1(void) { { } } -void bar2(void) { +void bar2(void) { // all-remark {{[OMP100] Potentially unknown OpenMP target region caller}} #pragma omp parallel // #1 // all-remark@#1 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} // safe-remark@#1 {{Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}} diff --git a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c index 97507041e195..5747a05a13d3 100644 --- a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c +++ b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c @@ -4,7 +4,7 @@ // host-no-diagnostics -void bar(void) { +void bar(void) { // expected-remark {{[OMP100] Potentially unknown OpenMP target region caller}} #pragma omp parallel // #1 \ // expected-remark@#1 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \ // expected-remark@#1 {{Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}} diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 6053412bae84..5b4772028daf 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1469,8 +1469,16 @@ Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { } CachedKernel = nullptr; - if (!F.hasLocalLinkage()) + if (!F.hasLocalLinkage()) { + + // See https://openmp.llvm.org/remarks/OptimizationRemarks.html + auto Remark = [&](OptimizationRemark OR) { + return OR << "[OMP100] Potentially unknown OpenMP target region caller"; + }; + emitRemarkOnFunction(&F, "OMP100", Remark); + return nullptr; + } } auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel { diff --git a/openmp/docs/remarks/OptimizationRemarks.rst b/openmp/docs/remarks/OptimizationRemarks.rst index fa7bf27b95ef..997a9a6d98c2 100644 --- a/openmp/docs/remarks/OptimizationRemarks.rst +++ b/openmp/docs/remarks/OptimizationRemarks.rst @@ -1,2 +1,30 @@ OpenMP Optimization Remarks =========================== + + +.. _omp100: +.. _omp_no_external_caller_in_target_region: + +`[OMP100]` Potentially unknown OpenMP target region caller +---------------------------------------------------------- + +A function remark that indicates the function, when compiled for a GPU, is +potentially called from outside the translation unit. Note that a remark is +only issued if we tried to perform an optimization which would require us to +know all callers on the GPU. + +To facilitate OpenMP semantics on GPUs we provide a runtime mechanism through +which the code that makes up the body of a parallel region is shared with the +threads in the team. Generally we use the address of the outlined parallel +region to identify the code that needs to be executed. If we know all target +regions that reach the parallel region we can avoid this function pointer +passing scheme and often improve the register usage on the GPU. However, If a +parallel region on the GPU is in a function with external linkage we may not +know all callers statically. If there are outside callers within target +regions, this remark is to be ignored. If there are no such callers, users can +modify the linkage and thereby help optimization with a `static` or +`__attribute__((internal))` function annotation. If changing the linkage is +impossible, e.g., because there are outside callers on the host, one can split +the function into an external visible interface which is not compiled for +the target and an internal implementation which is compiled for the target +and should be called from within the target region. _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits