sfantao created this revision.
sfantao added reviewers: ABataev, rjmccall, hfinkel.
sfantao added a subscriber: cfe-commits.

All global variables that are not enclosed in a declare target region must be 
captured in the target region as local variables do. Currently, there is no 
support for declare target, so this patch adds support for capturing all the 
global variables used in a the target region.

This patch requires http://reviews.llvm.org/D11361.


http://reviews.llvm.org/D12262

Files:
  include/clang/Basic/OpenMPKinds.h
  include/clang/Sema/Sema.h
  lib/Basic/OpenMPKinds.cpp
  lib/Sema/SemaExpr.cpp
  lib/Sema/SemaOpenMP.cpp
  test/OpenMP/target_codegen_global_capture.cpp

Index: test/OpenMP/target_codegen_global_capture.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/target_codegen_global_capture.cpp
@@ -0,0 +1,186 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+
+// CHECK-DAG: [[GA:@.+]] = global double 1.000000e+00
+// CHECK-DAG: [[GB:@.+]] = global double 2.000000e+00
+// CHECK-DAG: [[GC:@.+]] = global double 3.000000e+00
+// CHECK-DAG: [[GD:@.+]] = global double 4.000000e+00
+// CHECK-DAG: [[FA:@.+]] = internal global float 5.000000e+00
+// CHECK-DAG: [[FB:@.+]] = internal global float 6.000000e+00
+// CHECK-DAG: [[FC:@.+]] = internal global float 7.000000e+00
+// CHECK-DAG: [[FD:@.+]] = internal global float 8.000000e+00
+// CHECK-DAG: [[BA:@.+]] = internal global float 9.000000e+00
+// CHECK-DAG: [[BB:@.+]] = internal global float 1.000000e+01
+// CHECK-DAG: [[BC:@.+]] = internal global float 1.100000e+01
+// CHECK-DAG: [[BD:@.+]] = internal global float 1.200000e+01
+double Ga = 1.0;
+double Gb = 2.0;
+double Gc = 3.0;
+double Gd = 4.0;
+
+// CHECK: define {{.*}} @{{.*}}foo{{.*}}(
+// CHECK-SAME: i16 {{[^,]*}}[[A:%[^,]+]],
+// CHECK-SAME: i16 {{[^,]*}}[[B:%[^,]+]],
+// CHECK-SAME: i16 {{[^,]*}}[[C:%[^,]+]],
+// CHECK-SAME: i16 {{[^,]*}}[[D:%[^,]+]])
+// CHECK: [[LA:%.+]] = alloca i16
+// CHECK: [[LB:%.+]] = alloca i16
+// CHECK: [[LC:%.+]] = alloca i16
+// CHECK: [[LD:%.+]] = alloca i16
+int foo(short a, short b, short c, short d){
+  static float Sa = 5.0;
+  static float Sb = 6.0;
+  static float Sc = 7.0;
+  static float Sd = 8.0;
+
+  // CHECK-DAG: [[REFB:%.+]] = bitcast i16* [[LB]] to i8*
+  // CHECK-DAG: store i8* [[REFB]], i8** [[GEPB:%.+]]
+  // CHECK-DAG: [[REFC:%.+]] = bitcast i16* [[LC]] to i8*
+  // CHECK-DAG: store i8* [[REFC]], i8** [[GEPC:%.+]]
+  // CHECK-DAG: [[REFD:%.+]] = bitcast i16* [[LD]] to i8*
+  // CHECK-DAG: store i8* [[REFD]], i8** [[GEPD:%.+]]
+  // CHECK-DAG: store i8* bitcast (double* [[GB]] to i8*), i8** [[GEPGB:%.+]]
+  // CHECK-DAG: store i8* bitcast (double* [[GC]] to i8*), i8** [[GEPGC:%.+]]
+  // CHECK-DAG: store i8* bitcast (double* [[GD]] to i8*), i8** [[GEPGD:%.+]]
+  // CHECK-DAG: store i8* bitcast (float* [[FB]] to i8*), i8** [[GEPFB:%.+]]
+  // CHECK-DAG: store i8* bitcast (float* [[FC]] to i8*), i8** [[GEPFC:%.+]]
+  // CHECK-DAG: store i8* bitcast (float* [[FD]] to i8*), i8** [[GEPFD:%.+]]
+  // CHECK-DAG: [[GEPB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+  // CHECK-DAG: [[GEPC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+  // CHECK-DAG: [[GEPD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+  // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+  // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+  // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+  // CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+  // CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+  // CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+  // CHECK: call i32 @__tgt_target
+  // CHECK: call void [[OFFLOADF:@.+]](
+  // Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd
+  #pragma omp target if(Ga>0.0 && a>0 && Sa>0.0)
+  {
+    b += 1;
+    Gb += 1.0;
+    Sb += 1.0;
+
+
+    // CHECK: define internal void [[PARF:@.+]](i32* %{{.*}}, i32* %{{.*}},
+    // CHECK: define internal void [[OFFLOADF]](
+
+    // CHECK:     alloca [[CCAPTY:%.+]],
+    // CHECK:     [[CAP:%.+]] = alloca [[CAPTY:%.+]],
+    // CHECK:     getelementptr inbounds [[CAPTY]], [[CAPTY]]* [[CAP]], i32 0, i32
+    // CHECK:     getelementptr inbounds [[CAPTY]], [[CAPTY]]* [[CAP]], i32 0, i32
+    // CHECK:     getelementptr inbounds [[CAPTY]], [[CAPTY]]* [[CAP]], i32 0, i32
+    // CHECK-NOT: getelementptr inbounds [[CAPTY]], [[CAPTY]]* [[CAP]], i32 0, i32
+    // Capture d, Gd, Sd,
+    #pragma omp parallel if(Gc>0.0 && c>0 && Sc>0.0)
+    {
+      d += 1;
+      Gd += 1.0;
+      Sd += 1.0;
+    }
+  }
+  return a + b + c + d + (int)Sa + (int)Sb + (int)Sc + (int)Sd;
+}
+
+// CHECK: define {{.*}} @{{.*}}bar{{.*}}(
+// CHECK-SAME: i16 {{[^,]*}}[[A:%[^,]+]],
+// CHECK-SAME: i16 {{[^,]*}}[[B:%[^,]+]],
+// CHECK-SAME: i16 {{[^,]*}}[[C:%[^,]+]],
+// CHECK-SAME: i16 {{[^,]*}}[[D:%[^,]+]])
+// CHECK: [[LA:%.+]] = alloca i16
+// CHECK: [[LB:%.+]] = alloca i16
+// CHECK: [[LC:%.+]] = alloca i16
+// CHECK: [[LD:%.+]] = alloca i16
+int bar(short a, short b, short c, short d){
+  static float Sa = 9.0;
+  static float Sb = 10.0;
+  static float Sc = 11.0;
+  static float Sd = 12.0;
+
+  // CHECK-DAG: store i16* [[LA]], i16** [[GEPA:%.+]],
+  // CHECK-DAG: store i16* [[LB]], i16** [[GEPB:%.+]],
+  // CHECK-DAG: store i16* [[LC]], i16** [[GEPC:%.+]],
+  // CHECK-DAG: store i16* [[LD]], i16** [[GEPD:%.+]],
+  // CHECK-DAG: [[GEPA]] = getelementptr inbounds %{{.*}}, %{{.*}}* %{{.*}}, i32 0, i32 [[IDXA:[0-9]]]
+  // CHECK-DAG: [[GEPB]] = getelementptr inbounds %{{.*}}, %{{.*}}* %{{.*}}, i32 0, i32 [[IDXB:[0-9]]]
+  // CHECK-DAG: [[GEPC]] = getelementptr inbounds %{{.*}}, %{{.*}}* %{{.*}}, i32 0, i32 [[IDXC:[0-9]]]
+  // CHECK-DAG: [[GEPD]] = getelementptr inbounds %{{.*}}, %{{.*}}* %{{.*}}, i32 0, i32 [[IDXD:[0-9]]]
+  // CHECK: define internal void [[PARF:@.+]](i32* %{{.*}}, i32* %{{.*}},
+  // Capture a, b, c, d
+  #pragma omp parallel
+  {
+    // CHECK: alloca [[CAPTY:%.+]]*,
+    // CHECK-DAG: [[ADRA:%.+]] = getelementptr inbounds [[CAPTY]], [[CAPTY]]* %{{.*}}, i32 0, i32 [[IDXA]]
+    // CHECK-DAG: [[ADRB:%.+]] = getelementptr inbounds [[CAPTY]], [[CAPTY]]* %{{.*}}, i32 0, i32 [[IDXB]]
+    // CHECK-DAG: [[ADRC:%.+]] = getelementptr inbounds [[CAPTY]], [[CAPTY]]* %{{.*}}, i32 0, i32 [[IDXC]]
+    // CHECK-DAG: [[ADRD:%.+]] = getelementptr inbounds [[CAPTY]], [[CAPTY]]* %{{.*}}, i32 0, i32 [[IDXD]]
+    // CHECK-DAG: [[REFA:%.+]] = load i16*, i16** [[ADRA]],
+    // CHECK-DAG: [[REFB:%.+]] = load i16*, i16** [[ADRB]],
+    // CHECK-DAG: [[REFC:%.+]] = load i16*, i16** [[ADRC]],
+    // CHECK-DAG: [[REFD:%.+]] = load i16*, i16** [[ADRD]],
+
+    // CHECK: load float, float* [[BA]]
+
+    // CHECK-DAG: [[CSTB:%.+]] = bitcast i16* [[REFB]] to i8*
+    // CHECK-DAG: [[CSTC:%.+]] = bitcast i16* [[REFC]] to i8*
+    // CHECK-DAG: [[CSTD:%.+]] = bitcast i16* [[REFD]] to i8*
+    // CHECK-DAG: store i8* [[CSTB]], i8** [[GEPB:%.+]]
+    // CHECK-DAG: store i8* [[CSTC]], i8** [[GEPC:%.+]]
+    // CHECK-DAG: store i8* [[CSTD]], i8** [[GEPD:%.+]]
+    // CHECK-DAG: store i8* bitcast (double* [[GB]] to i8*), i8** [[GEPGB:%.+]]
+    // CHECK-DAG: store i8* bitcast (double* [[GC]] to i8*), i8** [[GEPGC:%.+]]
+    // CHECK-DAG: store i8* bitcast (double* [[GD]] to i8*), i8** [[GEPGD:%.+]]
+    // CHECK-DAG: store i8* bitcast (float* [[BB]] to i8*), i8** [[GEPBB:%.+]]
+    // CHECK-DAG: store i8* bitcast (float* [[BC]] to i8*), i8** [[GEPBC:%.+]]
+    // CHECK-DAG: store i8* bitcast (float* [[BD]] to i8*), i8** [[GEPBD:%.+]]
+
+    // CHECK-DAG: [[GEPB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+    // CHECK-DAG: [[GEPC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+    // CHECK-DAG: [[GEPD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+    // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+    // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+    // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+    // CHECK-DAG: [[GEPBB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+    // CHECK-DAG: [[GEPBC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+    // CHECK-DAG: [[GEPBD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}}
+    // CHECK: call i32 @__tgt_target
+    // CHECK: call void [[OFFLOADF:@.+]](
+    // Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd
+    #pragma omp target if(Ga>0.0 && a>0 && Sa>0.0)
+    {
+      b += 1;
+      Gb += 1.0;
+      Sb += 1.0;
+
+      // CHECK: define internal void [[PARF:@.+]](i32* %{{.*}}, i32* %{{.*}},
+      // CHECK: define internal void [[OFFLOADF]](
+      // CHECK:     alloca [[CCAPTY:%.+]],
+      // CHECK:     [[CAP:%.+]] = alloca [[CAPTY:%.+]],
+      // CHECK:     getelementptr inbounds [[CAPTY]], [[CAPTY]]* [[CAP]], i32 0, i32
+      // CHECK:     getelementptr inbounds [[CAPTY]], [[CAPTY]]* [[CAP]], i32 0, i32
+      // CHECK:     getelementptr inbounds [[CAPTY]], [[CAPTY]]* [[CAP]], i32 0, i32
+      // CHECK-NOT: getelementptr inbounds [[CAPTY]], [[CAPTY]]* [[CAP]], i32 0, i32
+
+      // Capture d, Gd, Sd
+      #pragma omp parallel if(Gc>0.0 && c>0 && Sc>0.0)
+      {
+        d += 1;
+        Gd += 1.0;
+        Sd += 1.0;
+      }
+    }
+  }
+  return a + b + c + d + (int)Sa + (int)Sb + (int)Sc + (int)Sd;
+}
+
+#endif
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -190,6 +190,13 @@
   bool hasExplicitDSA(VarDecl *D,
                       const llvm::function_ref<bool(OpenMPClauseKind)> &CPred,
                       unsigned Level);
+
+  /// \brief Returns true if the directive at level \Level matches in the
+  /// specified \a DPred predicate.
+  bool hasExplicitDirective(
+      const llvm::function_ref<bool(OpenMPDirectiveKind)> &DPred,
+      unsigned Level);
+
   /// \brief Finds a directive which matches specified \a DPred predicate.
   template <class NamedDirectivesPredicate>
   bool hasDirective(NamedDirectivesPredicate DPred, bool FromParent);
@@ -635,6 +642,19 @@
          CPred(StartI->SharingMap[D].Attributes);
 }
 
+bool DSAStackTy::hasExplicitDirective(
+    const llvm::function_ref<bool(OpenMPDirectiveKind)> &DPred,
+    unsigned Level) {
+  if (isClauseParsingMode())
+    ++Level;
+  auto StartI = Stack.rbegin();
+  auto EndI = std::prev(Stack.rend());
+  if (std::distance(StartI, EndI) <= (int)Level)
+    return false;
+  std::advance(StartI, Level);
+  return DPred(StartI->Directive);
+}
+
 template <class NamedDirectivesPredicate>
 bool DSAStackTy::hasDirective(NamedDirectivesPredicate DPred, bool FromParent) {
   auto StartI = std::next(Stack.rbegin());
@@ -658,6 +678,29 @@
 bool Sema::IsOpenMPCapturedVar(VarDecl *VD) {
   assert(LangOpts.OpenMP && "OpenMP is not allowed");
   VD = VD->getCanonicalDecl();
+
+  // If we are attempting to capture a global variable in a directive with
+  // 'target' we return true so that this global is also mapped to the device.
+  //
+  // FIXME: If the declaration is enclosed in a 'declare target' directive,
+  // then it should not be captured. Therefore, an extra check has to be
+  // inserted here once support for 'declare target' is added.
+  //
+  if (!VD->hasLocalStorage()) {
+    if (DSAStack->getCurrentDirective() == OMPD_target &&
+        !DSAStack->isClauseParsingMode()) {
+      return true;
+    }
+    if (DSAStack->hasDirective(
+            [](OpenMPDirectiveKind K, const DeclarationNameInfo &DNI,
+               SourceLocation Loc) -> bool {
+              return isOpenMPTargetDirective(K);
+            },
+            false)) {
+      return true;
+    }
+  }
+
   if (DSAStack->getCurrentDirective() != OMPD_unknown &&
       (!DSAStack->isClauseParsingMode() ||
        DSAStack->getParentDirective() != OMPD_unknown)) {
@@ -682,6 +725,14 @@
       VD, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, Level);
 }
 
+bool Sema::isOpenMPTargetCapturedVar(VarDecl *VD, unsigned Level) {
+  assert(LangOpts.OpenMP && "OpenMP is not allowed");
+  // Return true if the current level is no longer enclosed in a target region.
+
+  return !VD->hasLocalStorage() &&
+         DSAStack->hasExplicitDirective(isOpenMPTargetDirective, Level);
+}
+
 void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; }
 
 void Sema::StartOpenMPDSABlock(OpenMPDirectiveKind DKind,
Index: lib/Sema/SemaExpr.cpp
===================================================================
--- lib/Sema/SemaExpr.cpp
+++ lib/Sema/SemaExpr.cpp
@@ -12833,10 +12833,18 @@
     if (getLangOpts().OpenMP) {
       if (auto *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
         // OpenMP private variables should not be captured in outer scope, so
-        // just break here.
+        // just break here. Similarly, global variables that are captured in a
+        // target region should not be captured outside the scope of the region.
         if (RSI->CapRegionKind == CR_OpenMP) {
-          if (isOpenMPPrivateVar(Var, OpenMPLevel)) {
-            Nested = true;
+          auto isTargetCap = isOpenMPTargetCapturedVar(Var, OpenMPLevel);
+          // When we detect target captures we are looking from inside the
+          // target region, therefore we need to propagate the capture from the
+          // enclosing region. Therefore, the capture is not initially nested.
+          if (isTargetCap)
+            FunctionScopesIndex--;
+
+          if (isTargetCap || isOpenMPPrivateVar(Var, OpenMPLevel)) {
+            Nested = !isTargetCap;
             DeclRefType = DeclRefType.getUnqualifiedType();
             CaptureType = Context.getLValueReferenceType(DeclRefType);
             break;
Index: lib/Basic/OpenMPKinds.cpp
===================================================================
--- lib/Basic/OpenMPKinds.cpp
+++ lib/Basic/OpenMPKinds.cpp
@@ -387,6 +387,10 @@
          DKind == OMPD_parallel_sections; // TODO add next directives.
 }
 
+bool clang::isOpenMPTargetDirective(OpenMPDirectiveKind DKind) {
+  return DKind == OMPD_target; // TODO add next directives.
+}
+
 bool clang::isOpenMPTeamsDirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_teams; // TODO add next directives.
 }
Index: include/clang/Sema/Sema.h
===================================================================
--- include/clang/Sema/Sema.h
+++ include/clang/Sema/Sema.h
@@ -7693,6 +7693,11 @@
   /// is performed.
   bool isOpenMPPrivateVar(VarDecl *VD, unsigned Level);
 
+  /// \brief Check if the specified variable is capturd  by 'target' directive.
+  /// \param Level Relative level of nested OpenMP construct for that the check
+  /// is performed.
+  bool isOpenMPTargetCapturedVar(VarDecl *VD, unsigned Level);
+
   ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc,
                                                     Expr *Op);
   /// \brief Called on start of new data sharing attribute block.
Index: include/clang/Basic/OpenMPKinds.h
===================================================================
--- include/clang/Basic/OpenMPKinds.h
+++ include/clang/Basic/OpenMPKinds.h
@@ -101,6 +101,12 @@
 /// parallel', otherwise - false.
 bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind);
 
+/// \brief Checks if the specified directive is a target-kind directive.
+/// \param DKind Specified directive.
+/// \return true - the directive is a target-like directive like 'omp target',
+/// otherwise - false.
+bool isOpenMPTargetDirective(OpenMPDirectiveKind DKind);
+
 /// \brief Checks if the specified directive is a teams-kind directive.
 /// \param DKind Specified directive.
 /// \return true - the directive is a teams-like directive like 'omp teams',
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to