This revision was automatically updated to reflect the committed changes.
Closed by commit rG10fedd94b432: [OpenMP] Use the OpenMPIRBuilder for `omp 
parallel` (authored by jdoerfert).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70290/new/

https://reviews.llvm.org/D70290

Files:
  clang/lib/CodeGen/CGStmtOpenMP.cpp
  clang/test/OpenMP/cancel_codegen.cpp
  clang/test/OpenMP/parallel_codegen.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
  llvm/lib/Frontend/OpenMP/OMPConstants.cpp
  llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
  llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp

Index: llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
===================================================================
--- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -376,6 +376,10 @@
   Function *OutlinedFn = PrivAI->getFunction();
   EXPECT_NE(F, OutlinedFn);
   EXPECT_FALSE(verifyModule(*M));
+  EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
+  EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoRecurse));
+  EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
+  EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
 
   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===================================================================
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Error.h"
@@ -501,10 +502,21 @@
       dbgs() << " PBR: " << BB->getName() << "\n";
   });
 
+  // Add some known attributes to the outlined function.
   Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
+  OutlinedFn->addParamAttr(0, Attribute::NoAlias);
+  OutlinedFn->addParamAttr(1, Attribute::NoAlias);
+  OutlinedFn->addFnAttr(Attribute::NoUnwind);
+  OutlinedFn->addFnAttr(Attribute::NoRecurse);
+
   LLVM_DEBUG(dbgs() << "After      outlining: " << *UI->getFunction() << "\n");
   LLVM_DEBUG(dbgs() << "   Outlined function: " << *OutlinedFn << "\n");
 
+  // For compability with the clang CG we move the outlined function after the
+  // one with the parallel region.
+  OutlinedFn->removeFromParent();
+  M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
+
   // Remove the artificial entry introduced by the extractor right away, we
   // made our own entry block after all.
   {
@@ -535,6 +547,23 @@
   RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
 
   FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
+  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
+    if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
+      llvm::LLVMContext &Ctx = F->getContext();
+      MDBuilder MDB(Ctx);
+      // Annotate the callback behavior of the __kmpc_fork_call:
+      //  - The callback callee is argument number 2 (microtask).
+      //  - The first two arguments of the callback callee are unknown (-1).
+      //  - All variadic arguments to the __kmpc_fork_call are passed to the
+      //    callback callee.
+      F->addMetadata(
+          llvm::LLVMContext::MD_callback,
+          *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
+                                      2, {-1, -1},
+                                      /* VarArgsArePassed */ true)}));
+    }
+  }
+
   Builder.CreateCall(RTLFn, RealArgs);
 
   LLVM_DEBUG(dbgs() << "With fork_call placed: "
Index: llvm/lib/Frontend/OpenMP/OMPConstants.cpp
===================================================================
--- llvm/lib/Frontend/OpenMP/OMPConstants.cpp
+++ llvm/lib/Frontend/OpenMP/OMPConstants.cpp
@@ -65,7 +65,7 @@
 #define OMP_TYPE(VarName, InitValue) VarName = InitValue;
 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \
   VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg);            \
-  VarName##Ptr = PointerType::getUnqual(T);
+  VarName##Ptr = PointerType::getUnqual(VarName);
 #define OMP_STRUCT_TYPE(VarName, StructName, ...)                              \
   T = M.getTypeByName(StructName);                                             \
   if (!T)                                                                      \
Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
===================================================================
--- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -205,6 +205,9 @@
 #define __OMP_RTL_ATTRS(Name, FnAttrSet, RetAttrSet, ArgAttrSets)              \
   OMP_RTL_ATTRS(OMPRTL_##Name, FnAttrSet, RetAttrSet, ArgAttrSets)
 
+__OMP_RTL_ATTRS(__kmpc_fork_call, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+
 __OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {})
 __OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {})
 
Index: clang/test/OpenMP/parallel_codegen.cpp
===================================================================
--- clang/test/OpenMP/parallel_codegen.cpp
+++ clang/test/OpenMP/parallel_codegen.cpp
@@ -1,22 +1,31 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,CHECK
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-DEBUG %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=ALL-DEBUG,CHECK-DEBUG %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=ALL-DEBUG,IRBUILDER-DEBUG %s
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
-// CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
-// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// ALL-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
+// ALL-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// ALL-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 // CHECK-DEBUG-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CHECK-DEBUG-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
 // CHECK-DEBUG-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
-// CHECK-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+19]];1;;\00"
-// CHECK-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+8]];1;;\00"
+// CHECK-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+22]];1;;\00"
+// CHECK-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+11]];1;;\00"
+// IRBUILDER-DEBUG-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
+// IRBUILDER-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+19]];0;;\00"
+// IRBUILDER-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain<char **>;[[@LINE+8]];0;;\00"
 
 template <class T>
 void foo(T argc) {}
@@ -39,65 +48,74 @@
   return tmain(argv);
 }
 
-// CHECK-LABEL: define {{[a-z\_\b]*[ ]?i32}} @main({{i32[ ]?[a-z]*}} %argc, i8** %argv)
-// CHECK: store i32 %argc, i32* [[ARGC_ADDR:%.+]],
-// CHECK: [[VLA:%.+]] = alloca i32, i{{[0-9]+}} [[VLA_SIZE:%[^,]+]],
+// ALL-LABEL: define {{[a-z\_\b]*[ ]?i32}} @main({{i32[ ]?[a-z]*}} %argc, i8** %argv)
+// ALL: store i32 %argc, i32* [[ARGC_ADDR:%.+]],
+// ALL: [[VLA:%.+]] = alloca i32, i{{[0-9]+}} [[VLA_SIZE:%[^,]+]],
 // CHECK:  call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i{{[0-9]+}}, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i{{[0-9]+}} [[VLA_SIZE]], i32* [[VLA]])
-// CHECK-NEXT:  [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
-// CHECK-NEXT:  [[RET:%.+]] = call {{[a-z\_\b]*[ ]?i32}} [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
-// CHECK:       ret i32
-// CHECK-NEXT:  }
-// CHECK-DEBUG-LABEL: define i32 @main(i32 %argc, i8** %argv)
+// IRBUILDER:   call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* [[VLA]])
+// ALL:  [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
+// ALL-NEXT:  [[RET:%.+]] = call {{[a-z\_\b]*[ ]?i32}} [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
+// ALL:       ret i32
+// ALL-NEXT:  }
+// ALL-DEBUG-LABEL: define i32 @main(i32 %argc, i8** %argv)
 // CHECK-DEBUG:       [[LOC_2_ADDR:%.+]] = alloca %struct.ident_t
 // CHECK-DEBUG:       [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[LOC_2_ADDR]] to i8*
 // CHECK-DEBUG-NEXT:  [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[DEF_LOC_2]] to i8*
 // CHECK-DEBUG-NEXT:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[KMPC_LOC_VOIDPTR]], i8* align 8 [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i1 false)
-// CHECK-DEBUG:       store i32 %argc, i32* [[ARGC_ADDR:%.+]],
-// CHECK-DEBUG:       [[VLA:%.+]] = alloca i32, i64 [[VLA_SIZE:%[^,]+]],
+// ALL-DEBUG:       store i32 %argc, i32* [[ARGC_ADDR:%.+]],
+// ALL-DEBUG:       [[VLA:%.+]] = alloca i32, i64 [[VLA_SIZE:%[^,]+]],
 // CHECK-DEBUG:       [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %struct.ident_t, %struct.ident_t* [[LOC_2_ADDR]], i32 0, i32 4
 // CHECK-DEBUG-NEXT:  store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC1]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]]
 // CHECK-DEBUG:       call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[LOC_2_ADDR]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 [[VLA_SIZE]], i32* [[VLA]])
-// CHECK-DEBUG-NEXT:  [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
-// CHECK-DEBUG-NEXT:  [[RET:%.+]] = call i32 [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
-// CHECK-DEBUG:       ret i32
-// CHECK-DEBUG-NEXT:  }
+// IRBUILDER-DEBUG:       call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* [[VLA]])
+// ALL-DEBUG:        [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
+// ALL-DEBUG:        [[RET:%.+]] = call i32 [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
+// ALL-DEBUG:        ret i32
+// ALL-DEBUG-NEXT:  }
 
-// CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i{{[0-9]+}}{{.*}} [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
-// CHECK-SAME:       #[[FN_ATTRS:[0-9]+]]
+// CHECK:           define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i{{[0-9]+}}{{.*}} [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
+// CHECK-SAME:      #[[FN_ATTRS:[0-9]+]]
+// IRBUILDER:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i32* [[VLA_REF:%[^)]+]])
+// IRBUILDER-SAME:  #[[FN_ATTRS:[0-9]+]]
 // CHECK:       store i32* [[VLA_ADDR]], i32** [[VLA_PTR_ADDR:%.+]],
 // CHECK:       [[VLA_REF:%.+]] = load i32*, i32** [[VLA_PTR_ADDR]]
-// CHECK:       [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i{{[0-9]+}} 1
-// CHECK-NEXT:  [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]]
+// ALL:       [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i{{[0-9]+}} 1
+// ALL-NEXT:  [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]]
 // CHECK-NEXT:  invoke {{.*}}void [[FOO:@.+foo.+]](i32{{[ ]?[a-z]*}} [[VLA_ELEM]])
+// IRBUILDER: call {{.*}}void [[FOO:@.+foo.+]](i32{{[ ]?[a-z]*}} [[VLA_ELEM]])
 // CHECK:       ret void
 // CHECK:       call {{.*}}void @{{.+terminate.*|abort}}(
 // CHECK-NEXT:  unreachable
 // CHECK-NEXT:  }
-// CHECK-DEBUG:       define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
-// CHECK-DEBUG-SAME:  #[[FN_ATTRS:[0-9]+]]
+// CHECK-DEBUG:           define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
+// CHECK-DEBUG-SAME:      #[[FN_ATTRS:[0-9]+]]
+// IRBUILDER-DEBUG:       define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i32* [[VLA_REF:%[^)]+]])
+// IRBUILDER-DEBUG-SAME:  #[[FN_ATTRS:[0-9]+]]
 // CHECK-DEBUG:       store i32* [[VLA_ADDR]], i32** [[VLA_PTR_ADDR:%.+]],
 // CHECK-DEBUG:       [[VLA_REF:%.+]] = load i32*, i32** [[VLA_PTR_ADDR]]
-// CHECK-DEBUG:       [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i64 1
-// CHECK-DEBUG-NEXT:  [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]]
+// ALL-DEBUG:       [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i64 1
+// ALL-DEBUG-NEXT:  [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]]
 // CHECK-DEBUG-NEXT:  invoke void [[FOO:@.+foo.+]](i32 [[VLA_ELEM]])
+// IRBUILDER-DEBUG-NEXT:  call void [[FOO:@.+foo.+]](i32 [[VLA_ELEM]])
 // CHECK-DEBUG:       ret void
 // CHECK-DEBUG:       call void @{{.+terminate.*|abort}}(
 // CHECK-DEBUG-NEXT:  unreachable
 // CHECK-DEBUG-NEXT:  }
 
-// CHECK-DAG: define linkonce_odr {{.*}}void [[FOO]]({{i32[ ]?[a-z]*}} %argc)
-// CHECK-DAG: declare !callback ![[cbid:[0-9]+]] {{.*}}void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
-// CHECK-DEBUG-DAG: define linkonce_odr void [[FOO]](i32 %argc)
-// CHECK-DEBUG-DAG: declare !callback ![[cbid:[0-9]+]] void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
+// ALL-DAG: define linkonce_odr {{.*}}void [[FOO]]({{i32[ ]?[a-z]*}} %argc)
+// ALL-DAG: declare !callback ![[cbid:[0-9]+]] {{.*}}void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
+// ALL-DEBUG-DAG: define linkonce_odr void [[FOO]](i32 %argc)
+// ALL-DEBUG-DAG: declare !callback ![[cbid:[0-9]+]] void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
 // CHECK-DEBUG-DAG:       define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
 // CHECK-DEBUG-DAG:       call void [[OMP_OUTLINED_DEBUG]]
 
-// CHECK:       define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc)
-// CHECK:       store i8** %argc, i8*** [[ARGC_ADDR:%.+]],
+// ALL:       define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc)
+// ALL:       store i8** %argc, i8*** [[ARGC_ADDR:%.+]],
 // CHECK:       call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i{{64|32}})* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i{{64|32}} %{{.+}})
-// CHECK-NEXT:  ret i32 0
-// CHECK-NEXT:  }
-// CHECK-DEBUG:       define linkonce_odr i32 [[TMAIN]](i8** %argc)
+// IRBUILDER:   call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
+// ALL:  ret i32 0
+// ALL-NEXT:  }
+// ALL-DEBUG:       define linkonce_odr i32 [[TMAIN]](i8** %argc)
 // CHECK-DEBUG-DAG:   [[LOC_2_ADDR:%.+]] = alloca %struct.ident_t
 // CHECK-DEBUG:       [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[LOC_2_ADDR]] to i8*
 // CHECK-DEBUG-NEXT:  [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[DEF_LOC_2]] to i8*
@@ -106,35 +124,40 @@
 // CHECK-DEBUG:  [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %struct.ident_t, %struct.ident_t* [[LOC_2_ADDR]], i32 0, i32 4
 // CHECK-DEBUG-NEXT:  store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC2]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]]
 // CHECK-DEBUG-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[LOC_2_ADDR]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
-// CHECK-DEBUG-NEXT:  ret i32 0
-// CHECK-DEBUG-NEXT:  }
+// IRBUILDER-DEBUG:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
+// ALL-DEBUG:  ret i32 0
+// ALL-DEBUG-NEXT:  }
 
 // CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i{{64|32}}{{.*}} %{{.+}})
+// IRBUILDER:   define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i8*** [[ARGC_REF:%.*]], i{{64|32}}{{.*}} %{{.+}})
 // CHECK:       store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]],
 // CHECK:       [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]]
-// CHECK:       [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
+// ALL:         [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
 // CHECK-NEXT:  invoke {{.*}}void [[FOO1:@.+foo.+]](i8** [[ARGC]])
+// IRBUILDER-NEXT: call {{.*}}void [[FOO1:@.+foo.+]](i8** [[ARGC]])
 // CHECK:       ret void
 // CHECK:       call {{.*}}void @{{.+terminate.*|abort}}(
 // CHECK-NEXT:  unreachable
 // CHECK-NEXT:  }
 // CHECK-DEBUG:       define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i64 %{{.+}})
+// IRBUILDER-DEBUG:   define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i8*** [[ARGC_REF:%.*]], i64 %{{.+}})
 // CHECK-DEBUG:       store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]],
 // CHECK-DEBUG:       [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]]
-// CHECK-DEBUG:       [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
+// ALL-DEBUG:         [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
 // CHECK-DEBUG-NEXT:  invoke void [[FOO1:@.+foo.+]](i8** [[ARGC]])
+// IRBUILDER-DEBUG-NEXT:  call void [[FOO1:@.+foo.+]](i8** [[ARGC]])
 // CHECK-DEBUG:       ret void
 // CHECK-DEBUG:       call void @{{.+terminate.*|abort}}(
 // CHECK-DEBUG-NEXT:  unreachable
 // CHECK-DEBUG-NEXT:  }
 
-// CHECK: define linkonce_odr {{.*}}void [[FOO1]](i8** %argc)
+// ALL: define linkonce_odr {{.*}}void [[FOO1]](i8** %argc)
 // CHECK-DEBUG-DAG: define linkonce_odr void [[FOO1]](i8** %argc)
 // CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i64 %{{.+}})
 // CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]]({{[^)]+}}){{[^,]*}}, !dbg
 
-// CHECK: attributes #[[FN_ATTRS]] = {{.+}} nounwind
-// CHECK-DEBUG: attributes #[[FN_ATTRS]] = {{.+}} nounwind
-// CHECK: ![[cbid]] = !{![[cbidb:[0-9]+]]}
-// CHECK: ![[cbidb]] = !{i64 2, i64 -1, i64 -1, i1 true}
+// ALL: attributes #[[FN_ATTRS]] = {{.+}} nounwind
+// ALL-DEBUG: attributes #[[FN_ATTRS]] = {{.+}} nounwind
+// ALL: ![[cbid]] = !{![[cbidb:[0-9]+]]}
+// ALL: ![[cbidb]] = !{i64 2, i64 -1, i64 -1, i1 true}
 #endif
Index: clang/test/OpenMP/cancel_codegen.cpp
===================================================================
--- clang/test/OpenMP/cancel_codegen.cpp
+++ clang/test/OpenMP/cancel_codegen.cpp
@@ -1,10 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,CHECK
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,CHECK
 
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,IRBUILDER
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
@@ -16,7 +16,7 @@
 
 float flag;
 int main (int argc, char **argv) {
-// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
+// ALL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
 #pragma omp parallel
 {
 #pragma omp cancel parallel if(flag)
@@ -24,15 +24,15 @@
 #pragma omp barrier
   argv[0][0] += argc;
 }
-// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
 #pragma omp sections
 {
 #pragma omp cancel sections
 }
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: call i32 @__kmpc_cancel(
-// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_barrier(%struct.ident_t*
+// ALL: call void @__kmpc_for_static_init_4(
+// ALL: call i32 @__kmpc_cancel(
+// ALL: call void @__kmpc_for_static_fini(
+// ALL: call void @__kmpc_barrier(%struct.ident_t*
 #pragma omp sections
 {
 #pragma omp cancel sections
@@ -41,53 +41,53 @@
 #pragma omp cancel sections
   }
 }
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
-// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
-// CHECK: [[EXIT]]
-// CHECK: br label
-// CHECK: [[CONTINUE]]
-// CHECK: br label
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
-// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
-// CHECK: [[EXIT]]
-// CHECK: br label
-// CHECK: [[CONTINUE]]
-// CHECK: br label
-// CHECK: call void @__kmpc_for_static_fini(
+// ALL: call void @__kmpc_for_static_init_4(
+// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
+// ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
+// ALL: [[EXIT]]
+// ALL: br label
+// ALL: [[CONTINUE]]
+// ALL: br label
+// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
+// ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
+// ALL: [[EXIT]]
+// ALL: br label
+// ALL: [[CONTINUE]]
+// ALL: br label
+// ALL: call void @__kmpc_for_static_fini(
 #pragma omp for
 for (int i = 0; i < argc; ++i) {
 #pragma omp cancel for if(cancel: flag)
 }
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: [[FLAG:%.+]] = load float, float* @{{.+}},
-// CHECK: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
-// CHECK: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
-// CHECK: [[THEN]]
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 2)
-// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
-// CHECK: [[EXIT]]
-// CHECK: br label
-// CHECK: [[CONTINUE]]
-// CHECK: br label
-// CHECK: [[ELSE]]
-// CHECK: br label
-// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_barrier(%struct.ident_t*
+// ALL: call void @__kmpc_for_static_init_4(
+// ALL: [[FLAG:%.+]] = load float, float* @{{.+}},
+// ALL: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
+// ALL: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
+// ALL: [[THEN]]
+// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 2)
+// ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
+// ALL: [[EXIT]]
+// ALL: br label
+// ALL: [[CONTINUE]]
+// ALL: br label
+// ALL: [[ELSE]]
+// ALL: br label
+// ALL: call void @__kmpc_for_static_fini(
+// ALL: call void @__kmpc_barrier(%struct.ident_t*
 #pragma omp task
 {
 #pragma omp cancel taskgroup
 }
-// CHECK: call i8* @__kmpc_omp_task_alloc(
-// CHECK: call i32 @__kmpc_omp_task(
+// ALL: call i8* @__kmpc_omp_task_alloc(
+// ALL: call i32 @__kmpc_omp_task(
 #pragma omp parallel sections
 {
 #pragma omp cancel sections
 }
-// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
 #pragma omp parallel sections
 {
 #pragma omp cancel sections
@@ -96,14 +96,14 @@
 #pragma omp cancel sections
   }
 }
-// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
 int r = 0;
 #pragma omp parallel for reduction(+: r)
 for (int i = 0; i < argc; ++i) {
 #pragma omp cancel for
   r += i;
 }
-// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
   return argc;
 }
 
@@ -173,4 +173,29 @@
 // CHECK: call void @__kmpc_for_static_fini(
 // CHECK: ret void
 
+// IRBUILDER: define internal void @main
+
+// IRBUILDER: [[RETURN:omp.par.exit[^:]*]]
+// IRBUILDER-NEXT: ret void
+// IRBUILDER: [[FLAG:%.+]] = load float, float* @{{.+}},
+
+// IRBUILDER: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
+// IRBUILDER: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
+// IRBUILDER: [[ELSE]]
+// IRBUILDER-NEXT:   br label %[[ELSE2:.*]]
+// IRBUILDER: [[ELSE2]]
+// The barrier directive should now call __kmpc_cancel_barrier
+// IRBUILDER: call i32 @__kmpc_cancel_barrier(%struct.ident_t*
+// IRBUILDER: br label
+// IRBUILDER: [[THEN]]
+// IRBUILDER: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 1)
+// IRBUILDER: [[CMP:%.+]] = icmp eq i32 [[RES]], 0
+// IRBUILDER: br i1 [[CMP]], label %[[CONTINUE:[^,].+]], label %[[EXIT:.+]]
+// IRBUILDER: [[EXIT]]
+// IRBUILDER: br label %[[EXIT2:.+]]
+// IRBUILDER: [[EXIT2]]
+// IRBUILDER: br label %[[RETURN]]
+// IRBUILDER: [[CONTINUE]]
+// IRBUILDER: br label %[[ELSE:.+]]
+
 #endif
Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -21,6 +21,7 @@
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtOpenMP.h"
 #include "clang/Basic/PrettyStackTrace.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 using namespace clang;
 using namespace CodeGen;
 using namespace llvm::omp;
@@ -1318,6 +1319,87 @@
                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
 
 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
+
+  if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) {
+    // Check if we have any if clause associated with the directive.
+    llvm::Value *IfCond = nullptr;
+    if (const auto *C = S.getSingleClause<OMPIfClause>())
+      IfCond = EmitScalarExpr(C->getCondition(),
+                              /*IgnoreResultAssign=*/true);
+
+    llvm::Value *NumThreads = nullptr;
+    if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
+      NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
+                                  /*IgnoreResultAssign=*/true);
+
+    ProcBindKind ProcBind = OMP_PROC_BIND_default;
+    if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
+      ProcBind = ProcBindClause->getProcBindKind();
+
+    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+    // The cleanup callback that finalizes all variabels at the given location,
+    // thus calls destructors etc.
+    auto FiniCB = [this](InsertPointTy IP) {
+      CGBuilderTy::InsertPointGuard IPG(Builder);
+      assert(IP.getBlock()->end() != IP.getPoint() &&
+             "OpenMP IR Builder should cause terminated block!");
+      llvm::BasicBlock *IPBB = IP.getBlock();
+      llvm::BasicBlock *DestBB = IPBB->splitBasicBlock(IP.getPoint());
+      IPBB->getTerminator()->eraseFromParent();
+      Builder.SetInsertPoint(IPBB);
+      CodeGenFunction::JumpDest Dest = getJumpDestInCurrentScope(DestBB);
+      EmitBranchThroughCleanup(Dest);
+    };
+
+    // Privatization callback that performs appropriate action for
+    // shared/private/firstprivate/lastprivate/copyin/... variables.
+    //
+    // TODO: This defaults to shared right now.
+    auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                     llvm::Value &Val, llvm::Value *&ReplVal) {
+      // The next line is appropriate only for variables (Val) with the
+      // data-sharing attribute "shared".
+      ReplVal = &Val;
+
+      return CodeGenIP;
+    };
+
+    const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
+    const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
+
+    auto BodyGenCB = [ParallelRegionBodyStmt,
+                      this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                            llvm::BasicBlock &ContinuationBB) {
+      auto OldAllocaIP = AllocaInsertPt;
+      AllocaInsertPt = &*AllocaIP.getPoint();
+
+      auto OldReturnBlock = ReturnBlock;
+      ReturnBlock = getJumpDestInCurrentScope(&ContinuationBB);
+
+      llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
+      CodeGenIPBB->splitBasicBlock(CodeGenIP.getPoint());
+      llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator();
+      CodeGenIPBBTI->removeFromParent();
+
+      Builder.SetInsertPoint(CodeGenIPBB);
+
+      EmitStmt(ParallelRegionBodyStmt);
+
+      Builder.Insert(CodeGenIPBBTI);
+
+      AllocaInsertPt = OldAllocaIP;
+      ReturnBlock = OldReturnBlock;
+    };
+
+    CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
+    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
+    Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB,
+                                                 FiniCB, IfCond, NumThreads,
+                                                 ProcBind, S.hasCancel()));
+    return;
+  }
+
   // Emit parallel region as a standalone region.
   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
     Action.Enter(CGF);
@@ -4747,6 +4829,19 @@
       break;
     }
   }
+  if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) {
+    // TODO: This check is necessary as we only generate `omp parallel` through
+    // the OpenMPIRBuilder for now.
+    if (S.getCancelRegion() == OMPD_parallel) {
+      llvm::Value *IfCondition = nullptr;
+      if (IfCond)
+        IfCondition = EmitScalarExpr(IfCond,
+                                     /*IgnoreResultAssign=*/true);
+      return Builder.restoreIP(
+          OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion()));
+    }
+  }
+
   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
                                         S.getCancelRegion());
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to