sfantao updated this revision to Diff 48839.
sfantao added a comment.

Rebase.


http://reviews.llvm.org/D17019

Files:
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGStmtOpenMP.cpp
  lib/CodeGen/CodeGenFunction.h
  test/OpenMP/teams_codegen.cpp

Index: test/OpenMP/teams_codegen.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/teams_codegen.cpp
@@ -0,0 +1,211 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+#ifdef CK1
+
+int Gbla;
+long long Gblb;
+
+// CK1-LABEL: teams_argument_global_local
+int teams_argument_global_local(int a){
+  int comp = 1;
+
+  int la = 23;
+  float lc = 25.0;
+
+  // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams
+  {
+    ++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+  // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32*
+  // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]],
+  // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]],
+  // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32*
+  // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]],
+  // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}},
+
+
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(la)
+  {
+    ++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+  // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32*
+  // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]],
+  // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]],
+  // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32*
+  // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]],
+  // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}},
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams thread_limit(la)
+  {
+    ++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
+  // CK1-64-DAG: [[NTB]] = load i32, i32* %c{{.+}},
+  // CK1-64-DAG: [[NTA]] = load i32, i32* %c{{.+}},
+
+  // CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+  // CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]]
+  // CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64
+  // CK1-DAG: [[TLD]] = load float, float* %{{.+}},
+  // CK1-DAG: [[TLB]] = load i64, i64* %{{.+}},
+
+  // CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc)
+  {
+    ++comp;
+  }
+
+  return comp;
+}
+
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+#ifdef CK2
+
+// CK2-DAG: [[SSI:%.+]] = type { i32, float }
+// CK2-DAG: [[SSL:%.+]] = type { i64, float }
+template <typename T>
+struct SS{
+  T a;
+  float b;
+};
+
+SS<int> Gbla;
+SS<long long> Gblb;
+
+// CK2-LABEL: teams_template_arg
+int teams_template_arg(void) {
+  int comp = 1;
+
+  SS<int> la;
+  SS<long long> lb;
+
+  // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK2-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+  // CK2-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0
+  // CK2-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}},
+
+  // CK2-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+  // CK2-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i64
+  // CK2-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]],
+  // CK2-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* %{{.+}}, i32 0, i32 1
+
+  // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(Gbla.a) thread_limit((long long)la.b)
+  {
+    ++comp;
+  }
+
+  // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK2-DAG: [[TL]] = trunc i64 [[TLD:%[^,]+]] to i32
+  // CK2-DAG: [[TLD]] = load i64, i64* [[TLA:%[^,]+]],
+  // CK2-DAG: [[TLA]] = getelementptr inbounds [[SSL]], [[SSL]]* [[TLB:%[^,]+]], i32 0, i32 0
+  // CK2-DAG: [[TLB]] = load [[SSL]]*, [[SSL]]** %{{.+}},
+
+  // CK2-DAG: [[NT]] = trunc i64 [[NTA:%[^,]+]] to i32
+  // CK2-DAG: [[NTA]] = fptosi float [[NTB:%[^,]+]] to i64
+  // CK2-DAG: [[NTB]] = load float, float* [[NTC:%[^,]+]],
+  // CK2-DAG: [[NTC]] = getelementptr inbounds [[SSL]], [[SSL]]* %{{.+}}, i32 0, i32 1
+
+  // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams((long long)lb.b) thread_limit(Gblb.a)
+  {
+    ++comp;
+  }
+  return comp;
+}
+#endif // CK2
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+#ifdef CK3
+
+// CK3: [[SSI:%.+]] = type { i32, float }
+// CK3-LABEL: teams_template_struct
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a;
+  float b;
+
+  int foo(void) {
+    int comp = 1;
+
+    // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123)
+
+    // CK3-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+    // CK3-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0
+    // CK3-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}},
+
+    // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}})
+    #pragma omp target
+    #pragma omp teams num_teams(a) thread_limit(X)
+    {
+      ++comp;
+    }
+
+    // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]])
+
+    // CK3-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 123
+    // CK3-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i32
+    // CK3-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]],
+    // CK3-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* [[TLD:%[^,]+]], i32 0, i32 1
+    // CK3-DAG: [[TLD]] = load [[SSI]]*, [[SSI]]** %{{.+}},
+
+    // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}})
+    #pragma omp target
+    #pragma omp teams num_teams(Y) thread_limit((int)b+X)
+    {
+      ++comp;
+    }
+    return comp;
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK3
+#endif
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -2216,6 +2216,8 @@
   llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K);
   llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
   Address GenerateCapturedStmtArgument(const CapturedStmt &S);
+  llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
+                                                     QualType ReturnQTy);
   llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S);
   void GenerateOpenMPCapturedVars(const CapturedStmt &S,
                                   SmallVectorImpl<llvm::Value *> &CapturedVars);
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -131,6 +131,12 @@
 
 llvm::Function *
 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
+  return GenerateOpenMPCapturedStmtFunction(S, getContext().VoidTy);
+}
+
+llvm::Function *
+CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
+                                                    QualType ReturnQTy) {
   assert(
       CapturedStmtInfo &&
       "CapturedStmtInfo should be set when generating the captured function");
@@ -180,7 +186,7 @@
   // Create the function declaration.
   FunctionType::ExtInfo ExtInfo;
   const CGFunctionInfo &FuncInfo =
-      CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
+      CGM.getTypes().arrangeFreeFunctionDeclaration(ReturnQTy, Args, ExtInfo,
                                                     /*IsVariadic=*/false);
   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
 
@@ -192,7 +198,7 @@
     F->addFnAttr(llvm::Attribute::NoUnwind);
 
   // Generate the function.
-  StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
+  StartFunction(CD, ReturnQTy, F, FuncInfo, Args, CD->getLocation(),
                 CD->getBody()->getLocStart());
   unsigned Cnt = CD->getContextParamPosition();
   I = S.captures().begin();
@@ -2680,8 +2686,12 @@
                                         CapturedVars);
 }
 
-void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
-  llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
+void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
+  LexicalScope Scope(*this, S.getSourceRange());
+  const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
+
+  // FIXME: We should fork teams here instead of just emit the statement.
+  EmitStmt(CS.getCapturedStmt());
 }
 
 void CodeGenFunction::EmitOMPCancellationPointDirective(
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -481,6 +481,10 @@
   // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
   // *arg_types);
   OMPRTL__tgt_target,
+  // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+  // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+  OMPRTL__tgt_target_teams,
   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
   OMPRTL__tgt_register_lib,
   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
@@ -1153,6 +1157,24 @@
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
     break;
   }
+  case OMPRTL__tgt_target_teams: {
+    // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+    // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+    // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+                                CGM.VoidPtrTy,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int32Ty->getPointerTo(),
+                                CGM.Int32Ty,
+                                CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
+    break;
+  }
   case OMPRTL__tgt_register_lib: {
     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
     QualType ParamTy =
@@ -3972,6 +3994,136 @@
       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
 }
 
+/// \brief Emit the num_teams clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no num_teams clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitNumTeamsClauseForTargetDirective(CodeGenFunction &CGF,
+                                     const OMPExecutableDirective &D,
+                                     ArrayRef<llvm::Value *> KernelArgs) {
+
+  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
+                                              "teams directive expected to be "
+                                              "emitted only for the host!");
+
+  // FIXME: For the moment we do not support combined directives with target and
+  // teams, so we do not expect to get any num_teams clause in the provided
+  // directive. Once we support that, this assertion can be replaced by the
+  // actual emission of the clause expression.
+  assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
+         "Not expecting clause in directive.");
+
+  // If the current target region has a teams region enclosed, we need to get
+  // the number of teams to pass to the runtime function call. This is done
+  // through a function that returns the value. This is required because the
+  // expression is captured in the enclosing target environment when the teams
+  // directive is not combined with target.
+
+  CodeGenModule &CGM = CGF.CGM;
+  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+
+  // FIXME: Accommodate other combined directives with teams when they become
+  // available.
+  if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
+    if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
+      auto &&CodeGen = [NTE](CodeGenFunction &CGF) {
+        auto *V = CGF.EmitScalarExpr(NTE->getNumTeams());
+        CGF.Builder.CreateRet(
+            CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
+        CGF.EmitBlock(CGF.createBasicBlock());
+      };
+
+      llvm::Function *AuxFn;
+      {
+        CodeGenFunction CGF(CGM, true);
+        CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen,
+                                        ".omp_offload.get_num_teams");
+        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+
+        AuxFn = CGF.GenerateOpenMPCapturedStmtFunction(
+            CS, CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/32,
+                                                       /*Signed=*/true));
+        AuxFn->addFnAttr(llvm::Attribute::AlwaysInline);
+      }
+      assert(AuxFn && "Invalid auxiliar function!");
+      return CGF.Builder.CreateCall(AuxFn, KernelArgs);
+    }
+
+    // If we have an enclosed teams directive but no num_teams clause we use
+    // the default value 0.
+    return CGF.Builder.getInt32(0);
+  }
+
+  // No teams associated with the directive.
+  return nullptr;
+}
+
+/// \brief Emit the thread_limit clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no thread_limit clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitThreadLimitClauseForTargetDirective(CodeGenFunction &CGF,
+                                        const OMPExecutableDirective &D,
+                                        ArrayRef<llvm::Value *> KernelArgs) {
+
+  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
+                                              "teams directive expected to be "
+                                              "emitted only for the host!");
+
+  // FIXME: For the moment we do not support combined directives with target and
+  // teams, so we do not expect to get any thread_limit clause in the provided
+  // directive. Once we support that, this assertion can be replaced by the
+  // actual emission of the clause expression.
+  assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
+         "Not expecting clause in directive.");
+
+  // If the current target region has a teams region enclosed, we need to get
+  // the number of teams to pass to the runtime function call. This is done
+  // through a function that returns the value. This is required because the
+  // expression is captured in the enclosing target environment when the teams
+  // directive is not combined with target.
+
+  CodeGenModule &CGM = CGF.CGM;
+  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+
+  // FIXME: Accommodate other combined directives with teams when they become
+  // available.
+  if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
+    if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
+      auto &&CodeGen = [TLE](CodeGenFunction &CGF) {
+        auto *V = CGF.EmitScalarExpr(TLE->getThreadLimit());
+        CGF.Builder.CreateRet(
+            CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
+        CGF.EmitBlock(CGF.createBasicBlock());
+      };
+
+      llvm::Function *AuxFn;
+      {
+        CodeGenFunction CGF(CGM, true);
+        CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen,
+                                        ".omp_offload.get_thread_limit");
+        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+
+        AuxFn = CGF.GenerateOpenMPCapturedStmtFunction(
+            CS, CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/32,
+                                                       /*Signed=*/true));
+        AuxFn->addFnAttr(llvm::Attribute::AlwaysInline);
+      }
+      assert(AuxFn && "Invalid auxiliar function!");
+      return CGF.Builder.CreateCall(AuxFn, KernelArgs);
+    }
+
+    // If we have an enclosed teams directive but no thread_limit clause we use
+    // the default value 0.
+    return CGF.Builder.getInt32(0);
+  }
+
+  // No teams associated with the directive.
+  return nullptr;
+}
+
 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
                                      const OMPExecutableDirective &D,
                                      llvm::Value *OutlinedFn,
@@ -4100,7 +4252,7 @@
   // Fill up the pointer arrays and transfer execution to the device.
   auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
                     hasVLACaptures, Device, OutlinedFnID, OffloadError,
-                    OffloadErrorQType](CodeGenFunction &CGF) {
+                    OffloadErrorQType, &D](CodeGenFunction &CGF) {
     unsigned PointerNumVal = BasePointers.size();
     llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
     llvm::Value *BasePointersArray;
@@ -4240,11 +4392,29 @@
     else
       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
 
-    llvm::Value *OffloadingArgs[] = {
-        DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
-        PointersArray, SizesArray,   MapTypesArray};
-    auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
-                                      OffloadingArgs);
+    // Return value of the runtime offloading call.
+    llvm::Value *Return;
+
+    auto *NumTeams = emitNumTeamsClauseForTargetDirective(CGF, D, BasePointers);
+    auto *ThreadLimit =
+        emitThreadLimitClauseForTargetDirective(CGF, D, BasePointers);
+
+    if (NumTeams) {
+      assert(ThreadLimit && "Thread limit expression should be available along "
+                            "with number of teams.");
+      llvm::Value *OffloadingArgs[] = {
+          DeviceID,          OutlinedFnID,  PointerNum,
+          BasePointersArray, PointersArray, SizesArray,
+          MapTypesArray,     NumTeams,      ThreadLimit};
+      Return = CGF.EmitRuntimeCall(
+          createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
+    } else {
+      llvm::Value *OffloadingArgs[] = {
+          DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
+          PointersArray, SizesArray,   MapTypesArray};
+      Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
+                                   OffloadingArgs);
+    }
 
     CGF.EmitStoreOfScalar(Return, OffloadError);
   };
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to