jdoerfert updated this revision to Diff 228331.
jdoerfert marked 2 inline comments as done.
jdoerfert added a comment.
Herald added a project: LLVM.
Adjust tests as requested
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D69922/new/
https://reviews.llvm.org/D69922
Files:
clang/include/clang/Basic/LangOptions.def
clang/include/clang/Driver/Options.td
clang/lib/CodeGen/CGOpenMPRuntime.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/CodeGen/CodeGenModule.h
clang/lib/Driver/ToolChains/Clang.cpp
clang/lib/Frontend/CompilerInvocation.cpp
clang/test/Driver/fopenmp.c
clang/test/OpenMP/barrier_codegen.cpp
llvm/include/llvm/IR/OpenMPIRBuilder.h
Index: llvm/include/llvm/IR/OpenMPIRBuilder.h
===================================================================
--- llvm/include/llvm/IR/OpenMPIRBuilder.h
+++ llvm/include/llvm/IR/OpenMPIRBuilder.h
@@ -37,6 +37,9 @@
/// Description of a LLVM-IR insertion point (IP) and a debug/source location
/// (filename, line, column, ...).
struct LocationDescription {
+ template <typename T, typename U>
+ LocationDescription(const IRBuilder<T, U> &IRB)
+ : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
LocationDescription(const IRBuilder<>::InsertPoint &IP) : IP(IP) {}
LocationDescription(const IRBuilder<>::InsertPoint &IP, const DebugLoc &DL)
: IP(IP), DL(DL) {}
Index: clang/test/OpenMP/barrier_codegen.cpp
===================================================================
--- clang/test/OpenMP/barrier_codegen.cpp
+++ clang/test/OpenMP/barrier_codegen.cpp
@@ -1,6 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CLANGCG
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CLANGCG
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-enable-irbuilder -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,IRBUILDER
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-enable-irbuilder -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-enable-irbuilder -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,IRBUILDER
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
@@ -14,6 +18,10 @@
// CHECK-DAG: [[EXPLICIT_BARRIER_LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) }
// CHECK-DAG: [[LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) }
+// CLANGCG-NOT: readonly
+// IRBUILDER: ; Function Attrs: nofree nosync nounwind readonly
+// IRBUILDER-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
+
void foo() {}
template <class T>
Index: clang/test/Driver/fopenmp.c
===================================================================
--- clang/test/Driver/fopenmp.c
+++ clang/test/Driver/fopenmp.c
@@ -124,6 +124,12 @@
// CHECK-LD-STATIC-IOMP5-NO-BDYNAMIC: "-{{B?}}static" {{.*}} "-liomp5"
// CHECK-LD-STATIC-IOMP5-NO-BDYNAMIC-NOT: "-Bdynamic"
//
+// RUN: %clang -target x86_64-linux-gnu -fopenmp -fopenmp-enable-irbuilder -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CC1-OPENMPIRBUILDER
+//
+// CHECK-CC1-OPENMPIRBUILDER: "-cc1"
+// CHECK-CC1-OPENMPIRBUILDER-SAME: "-fopenmp"
+// CHECK-CC1-OPENMPIRBUILDER-SAME: "-fopenmp-enable-irbuilder"
+//
// We'd like to check that the default is sane, but until we have the ability
// to *always* semantically analyze OpenMP without always generating runtime
// calls (in the event of an unsupported runtime), we don't have a good way to
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -2996,6 +2996,8 @@
Opts.OpenMP && !Args.hasArg(options::OPT_fnoopenmp_use_tls);
Opts.OpenMPIsDevice =
Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_is_device);
+ Opts.OpenMPIRBuilder =
+ Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_enable_irbuilder);
bool IsTargetSpecified =
Opts.OpenMPIsDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ);
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4555,6 +4555,7 @@
CmdArgs.push_back("-fnoopenmp-use-tls");
Args.AddLastArg(CmdArgs, options::OPT_fopenmp_simd,
options::OPT_fno_openmp_simd);
+ Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_enable_irbuilder);
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_number_of_sm_EQ);
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_blocks_per_sm_EQ);
Index: clang/lib/CodeGen/CodeGenModule.h
===================================================================
--- clang/lib/CodeGen/CodeGenModule.h
+++ clang/lib/CodeGen/CodeGenModule.h
@@ -45,6 +45,7 @@
class DataLayout;
class FunctionType;
class LLVMContext;
+class OpenMPIRBuilder;
class IndexedInstrProfReader;
}
@@ -319,6 +320,7 @@
std::unique_ptr<CGObjCRuntime> ObjCRuntime;
std::unique_ptr<CGOpenCLRuntime> OpenCLRuntime;
std::unique_ptr<CGOpenMPRuntime> OpenMPRuntime;
+ std::unique_ptr<llvm::OpenMPIRBuilder> OMPBuilder;
std::unique_ptr<CGCUDARuntime> CUDARuntime;
std::unique_ptr<CGDebugInfo> DebugInfo;
std::unique_ptr<ObjCEntrypoints> ObjCData;
@@ -585,6 +587,9 @@
return *OpenMPRuntime;
}
+ /// Return a pointer to the configured OpenMPIRBuilder, if any.
+ llvm::OpenMPIRBuilder *getOpenMPIRBuilder() { return OMPBuilder.get(); }
+
/// Return a reference to the configured CUDA runtime.
CGCUDARuntime &getCUDARuntime() {
assert(CUDARuntime != nullptr);
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -52,6 +52,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/OpenMPIRBuilder.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/CodeGen.h"
@@ -216,6 +217,14 @@
OpenMPRuntime.reset(new CGOpenMPRuntime(*this));
break;
}
+
+ // The OpenMP-IR-Builder should eventually replace the above runtime codegens
+ // but we are not there yet so they both reside in CGModule for now and the
+ // OpenMP-IR-Builder is opt-in only.
+ if (LangOpts.OpenMPIRBuilder) {
+ OMPBuilder.reset(new llvm::OpenMPIRBuilder(TheModule));
+ OMPBuilder->initialize();
+ }
}
void CodeGenModule::createCUDARuntime() {
Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -23,6 +23,7 @@
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/OpenMPIRBuilder.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
@@ -3477,6 +3478,18 @@
void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
OpenMPDirectiveKind Kind, bool EmitChecks,
bool ForceSimpleCall) {
+ // Check if we can use the OMPBuilder
+ // TODO: Implement missing functionality and remove this check and directly
+ // invoke the builder without going through the CGOpenMPRuntime.
+ auto *OMPRegionInfo =
+ dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
+ llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
+ if (OMPBuilder && (!EmitChecks || ForceSimpleCall || !OMPRegionInfo ||
+ !OMPRegionInfo->hasCancel())) {
+ OMPBuilder->CreateBarrier(CGF.Builder, Kind, ForceSimpleCall, EmitChecks);
+ return;
+ }
+
if (!CGF.HaveInsertPoint())
return;
// Build call __kmpc_cancel_barrier(loc, thread_id);
@@ -3486,8 +3499,7 @@
// thread_id);
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
getThreadID(CGF, Loc)};
- if (auto *OMPRegionInfo =
- dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
+ if (OMPRegionInfo) {
if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
llvm::Value *Result = CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -1640,6 +1640,8 @@
Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>,
HelpText<"Emit OpenMP code only for SIMD-based constructs.">;
+def fopenmp_enable_irbuilder : Flag<["-"], "fopenmp-enable-irbuilder">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>,
+ HelpText<"Use the experimental OpenMP-IR-Builder codegen path.">;
def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
def fopenmp_cuda_mode : Flag<["-"], "fopenmp-cuda-mode">, Group<f_Group>,
Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
Index: clang/include/clang/Basic/LangOptions.def
===================================================================
--- clang/include/clang/Basic/LangOptions.def
+++ clang/include/clang/Basic/LangOptions.def
@@ -212,6 +212,7 @@
LANGOPT(OpenMPUseTLS , 1, 0, "Use TLS for threadprivates or runtime calls")
LANGOPT(OpenMPIsDevice , 1, 0, "Generate code only for OpenMP target device")
LANGOPT(OpenMPCUDAMode , 1, 0, "Generate code for OpenMP pragmas in SIMT/SPMD mode")
+LANGOPT(OpenMPIRBuilder , 1, 0, "Use the experimental OpenMP-IR-Builder codegen path.")
LANGOPT(OpenMPCUDAForceFullRuntime , 1, 0, "Force to use full runtime in all constructs when offloading to CUDA devices")
LANGOPT(OpenMPCUDANumSMs , 32, 0, "Number of SMs for CUDA devices.")
LANGOPT(OpenMPCUDABlocksPerSM , 32, 0, "Number of blocks per SM for CUDA devices.")
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits