jdoerfert updated this revision to Diff 312535.
jdoerfert added a comment.

Rebase on top of D93439 <https://reviews.llvm.org/D93439>


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93079/new/

https://reviews.llvm.org/D93079

Files:
  clang/include/clang/Basic/AttrDocs.td
  llvm/lib/Transforms/IPO/OpenMPOpt.cpp
  llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
  openmp/docs/remarks/OptimizationRemarks.rst

Index: openmp/docs/remarks/OptimizationRemarks.rst
===================================================================
--- openmp/docs/remarks/OptimizationRemarks.rst
+++ openmp/docs/remarks/OptimizationRemarks.rst
@@ -25,6 +25,13 @@
 modify the linkage and thereby help optimization with a `static` or
 `__attribute__((internal))` function annotation. If changing the linkage is
 impossible, e.g., because there are outside callers on the host, one can split
-the function into an external visible interface which is not compiled for
-the target and an internal implementation which is compiled for the target
-and should be called from within the target region.
+the function into an external visible interface which is not compiled for the
+target and an internal implementation which is compiled for the target and
+should be called from within the target region. Finally,  we provide an
+assumption for the target side which can be spelled either as
+`__attribute__((assume("omp_no_external_caller_in_target_region")))` or as
+OpenMP assumption, i.a., `#pragma omp begin assume
+ext_no_external_caller_in_target_region`. The assumption effectively allows the
+compiler to assume no caller outside the current translation unit will call the
+function from a target region, hence the function is not called from outside
+translation units on the device.
Index: llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
===================================================================
--- llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
+++ llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
@@ -7,12 +7,18 @@
 ;     #pragma omp parallel
 ;     { }
 ; }
+; __attribute__((assume("no_external_callers")))
+; void baz(void) {
+;     #pragma omp parallel
+;     { }
+; }
 ; void foo(void) {
 ;   #pragma omp target teams
 ;   {
 ;     #pragma omp parallel
 ;     {}
 ;     bar();
+;     baz();
 ;     #pragma omp parallel
 ;     {}
 ;   }
@@ -23,13 +29,16 @@
 ; another kernel.
 
 ; CHECK-DAG: @__omp_outlined__1_wrapper.ID = private constant i8 undef
+; CHECK-DAG: @__omp_outlined__2b_wrapper.ID = private constant i8 undef
 ; CHECK-DAG: @__omp_outlined__3_wrapper.ID = private constant i8 undef
 
 ; CHECK-DAG:   icmp eq i8* %5, @__omp_outlined__1_wrapper.ID
+; CHECK-DAG:   icmp eq i8* %b6, @__omp_outlined__2b_wrapper.ID
 ; CHECK-DAG:   icmp eq i8* %7, @__omp_outlined__3_wrapper.ID
 
 ; CHECK-DAG:   call void @__kmpc_kernel_prepare_parallel(i8* @__omp_outlined__1_wrapper.ID)
-; CHECK-DAG:   call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void ()* @__omp_outlined__2_wrapper to i8*))
+; CHECK-DAG:   call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void ()* @__omp_outlined__2a_wrapper to i8*))
+; CHECK-DAG:   call void @__kmpc_kernel_prepare_parallel(i8* @__omp_outlined__2b_wrapper.ID)
 ; CHECK-DAG:   call void @__kmpc_kernel_prepare_parallel(i8* @__omp_outlined__3_wrapper.ID)
 
 
@@ -69,11 +78,20 @@
 
 .check.next:                                      ; preds = %.execute.parallel
   %6 = load i8*, i8** %work_fn, align 8
-  %work_match1 = icmp eq i8* %6, bitcast (void ()* @__omp_outlined__2_wrapper to i8*)
-  br i1 %work_match1, label %.execute.fn2, label %.check.next3
+  %work_match1 = icmp eq i8* %6, bitcast (void ()* @__omp_outlined__2a_wrapper to i8*)
+  br i1 %work_match1, label %.execute.fn2a, label %.check.next2
+
+.execute.fn2a:                                     ; preds = %.check.next
+  call void @__omp_outlined__2a_wrapper()
+  br label %.terminate.parallel
+
+.check.next2:                                      ; preds = %.execute.parallel
+  %b6 = load i8*, i8** %work_fn, align 8
+  %work_match1b = icmp eq i8* %b6, bitcast (void ()* @__omp_outlined__2b_wrapper to i8*)
+  br i1 %work_match1b, label %.execute.fn2b, label %.check.next3
 
-.execute.fn2:                                     ; preds = %.check.next
-  call void @__omp_outlined__2_wrapper()
+.execute.fn2b:                                     ; preds = %.check.next
+  call void @__omp_outlined__2b_wrapper()
   br label %.terminate.parallel
 
 .check.next3:                                     ; preds = %.check.next
@@ -111,6 +129,7 @@
 define internal void @__omp_outlined__() {
   call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void ()* @__omp_outlined__1_wrapper to i8*))
   call void @bar()
+  call void @baz()
   call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void ()* @__omp_outlined__3_wrapper to i8*))
   ret void
 }
@@ -125,11 +144,20 @@
 }
 
 define hidden void @bar() {
-  call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void ()* @__omp_outlined__2_wrapper to i8*))
+  call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void ()* @__omp_outlined__2a_wrapper to i8*))
+  ret void
+}
+
+define hidden void @baz() #0 {
+  call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void ()* @__omp_outlined__2b_wrapper to i8*))
+  ret void
+}
+
+define internal void @__omp_outlined__2a_wrapper() {
   ret void
 }
 
-define internal void @__omp_outlined__2_wrapper() {
+define internal void @__omp_outlined__2b_wrapper() {
   ret void
 }
 
@@ -147,6 +175,7 @@
 
 declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture readnone)
 
+attributes #0 = { "llvm.assume"="abc,omp_no_external_caller_in_target_region,123" }
 
 !nvvm.annotations = !{!0}
 
Index: llvm/lib/Transforms/IPO/OpenMPOpt.cpp
===================================================================
--- llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
+#include "llvm/IR/Assumptions.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/IPO.h"
@@ -1450,6 +1451,12 @@
   }
 };
 
+/// The "omp_no_external_caller_in_target_region" assumption guarantees that
+/// there are no external caller of a function which are inside an OpenMP
+/// target region.
+static KnownAssumptionString
+    NoExternalCallerInTargetRegion("omp_no_external_caller_in_target_region");
+
 Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
   if (!OMPInfoCache.ModuleSlice.count(&F))
     return nullptr;
@@ -1469,7 +1476,8 @@
     }
 
     CachedKernel = nullptr;
-    if (!F.hasLocalLinkage()) {
+    if (!F.hasLocalLinkage() &&
+        !hasAssumption(F, NoExternalCallerInTargetRegion)) {
 
       // See https://openmp.llvm.org/remarks/OptimizationRemarks.html
       auto Remark = [&](OptimizationRemark OR) {
Index: clang/include/clang/Basic/AttrDocs.td
===================================================================
--- clang/include/clang/Basic/AttrDocs.td
+++ clang/include/clang/Basic/AttrDocs.td
@@ -4018,9 +4018,13 @@
     "omp_no_openmp"
     "omp_no_openmp_routines"
     "omp_no_parallelism"
+    "omp_no_external_caller_in_target_region"
 
-The OpenMP standard defines the meaning of OpenMP assumptions ("omp_XYZ" is
-spelled "XYZ" in the `OpenMP 5.1 Standard`_).
+The OpenMP standard defines the meaning of the first three OpenMP assumptions
+("omp_XYZ" is spelled "XYZ" in the `OpenMP 5.1 Standard`_) while other are
+clang extensions spelled described `here
+<http://openmp.llvm.org/docs/remarks/OptimizationRemarks.html>`_ and spelled
+without the `omp` in the OpenMP assume directive syntax.
 
 .. _`OpenMP 5.1 Standard`: https://www.openmp.org/spec-html/5.1/openmpsu37.html#x56-560002.5.2
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D93079: [OpenMP]... Johannes Doerfert via Phabricator via cfe-commits

Reply via email to