gtbercea created this revision. gtbercea added reviewers: ABataev, AlexEichenberger, caomhin. Herald added subscribers: cfe-commits, jdoerfert, jfb, guansong, jholewinski. Herald added a project: clang.
This patch adds support for the registration of the requires directives with the runtime. Each requires directive clause will enable a particular flag to be set. The set of flags is passed to the runtime to be checked for compatibility with other such flags coming from other object files. The registration function is called whenever OpenMP is present even if a requires directive is not present. This helps detect cases in which requires directives are used inconsistently. Repository: rC Clang https://reviews.llvm.org/D60568 Files: lib/CodeGen/CGDecl.cpp lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp lib/CodeGen/CGOpenMPRuntimeNVPTX.h lib/CodeGen/CodeGenModule.cpp lib/CodeGen/CodeGenModule.h test/OpenMP/openmp_offload_registration.cpp
Index: test/OpenMP/openmp_offload_registration.cpp =================================================================== --- test/OpenMP/openmp_offload_registration.cpp +++ test/OpenMP/openmp_offload_registration.cpp @@ -26,7 +26,7 @@ // CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 2, [[DEVTY]]* getelementptr inbounds ([2 x [[DEVTY]]], [2 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) // Check target registration is registered as a Ctor. -// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @[[REGFN]], i8* bitcast (void ()* @[[REGFN]] to i8*) }] +// CHECK: appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @.omp_offloading.requires_reg, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @[[REGFN]], i8* bitcast (void ()* @[[REGFN]] to i8*) }] // Check presence of foo() and the outlined target region // CHECK: define void [[FOO:@.+]]() @@ -34,6 +34,11 @@ // Check registration and unregistration code. +// CHECK: define internal void @.omp_offloading.requires_reg() +// CHECK: call i32 @__tgt_register_requires(i64 0) +// CHECK: ret void +// CHECK: declare i32 @__tgt_register_requires(i64) + // CHECK: define internal void @[[UNREGFN:.+]](i8*) // CHECK-SAME: comdat($[[REGFN]]) { // CHECK: call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]]) Index: lib/CodeGen/CodeGenModule.h =================================================================== --- lib/CodeGen/CodeGenModule.h +++ lib/CodeGen/CodeGenModule.h @@ -291,6 +291,8 @@ typedef std::vector<Structor> CtorList; + bool HasRequiresUnifiedSharedMemory = false; + private: ASTContext &Context; const LangOptions &LangOpts; Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -410,6 +410,10 @@ AddGlobalCtor(CudaCtorFunction); } if (OpenMPRuntime) { + if (llvm::Function *OpenMPRequiresDirectiveRegFun = + OpenMPRuntime->emitRequiresDirectiveRegFun()) { + AddGlobalCtor(OpenMPRequiresDirectiveRegFun, 0, nullptr); + } if (llvm::Function *OpenMPRegistrationFunction = OpenMPRuntime->emitRegistrationFunction()) { auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ? Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.h =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -383,7 +383,8 @@ /// Perform check on requires decl to ensure that target architecture /// supports unified addressing - void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const override; + void checkArchForUnifiedAddressing(CodeGenModule &CGM, + const OMPRequiresDecl *D) const override; /// Returns default address space for the constant firstprivates, __constant__ /// address space by default. Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -4942,7 +4942,8 @@ /// Check to see if target architecture supports unified addressing which is /// a restriction for OpenMP requires clause "unified_shared_memory". void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( - const OMPRequiresDecl *D) const { + CodeGenModule &CGM, const OMPRequiresDecl *D) const { + CGOpenMPRuntime::checkArchForUnifiedAddressing(CGM, D); for (const OMPClause *Clause : D->clauselists()) { if (Clause->getClauseKind() == OMPC_unified_shared_memory) { switch (getCudaArch(CGM)) { Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -636,6 +636,9 @@ /// must be emitted. llvm::SmallDenseSet<const VarDecl *> DeferredGlobalVariables; + /// Creates and registers requires directives. + llvm::Function *createRequiresDirectiveRegistration(); + /// Creates and registers offloading binary descriptor for the current /// compilation unit. The function that does the registration is returned. llvm::Function *createOffloadingBinaryDescriptorRegistration(); @@ -1429,6 +1432,10 @@ /// \param GD Global to scan. virtual bool emitTargetGlobal(GlobalDecl GD); + /// Creates and returns a registration function for when at least one + /// requires directives was used in the current module. + virtual llvm::Function *emitRequiresDirectiveRegFun(); + /// Creates the offloading descriptor in the event any target region /// was emitted in the current module and return the function that registers /// it. @@ -1597,7 +1604,8 @@ /// Perform check on requires decl to ensure that target architecture /// supports unified addressing - virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const {} + virtual void checkArchForUnifiedAddressing(CodeGenModule &CGM, + const OMPRequiresDecl *D) const; /// Checks if the variable has associated OMPAllocateDeclAttr attribute with /// the predefined allocator and translates it into the corresponding address Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -694,6 +694,8 @@ // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); OMPRTL__tgt_target_teams_nowait, + // Call to void __tgt_register_requires(int64_t flags); + OMPRTL__tgt_register_requires, // Call to void __tgt_register_lib(__tgt_bin_desc *desc); OMPRTL__tgt_register_lib, // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); @@ -721,6 +723,25 @@ OMPRTL__tgt_target_data_update_nowait, }; +enum OpenMPOffloadingRequiresDirFlags : int64_t { + /// no requires directive present. + OMP_REQ_NONE = 0x000, + /// reverse_offload clause. + OMP_REQ_REVERSE_OFFLOAD = 0x001, + /// unified_address clause. + OMP_REQ_UNIFIED_ADDRESS = 0x002, + /// unified_shared_memory clause. + OMP_REQ_UNIFIED_SHARED_MEMORY = 0x004, + /// atomic_default_mem_order seq_cst clause. + OMP_REQ_ATOMIC_DEFAULT_SEQ_CST = 0x008, + /// atomic_default_mem_order acq_rel clause. + OMP_REQ_ATOMIC_DEFAULT_ACQ_REL = 0x010, + /// atomic_default_mem_order relaxed clause. + OMP_REQ_ATOMIC_DEFAULT_RELAXED = 0x020, + /// dynamic_allocators clause. + OMP_REQ_DYNAMIC_ALLOCATORS = 0x040 +}; + /// A basic class for pre|post-action for advanced codegen sequence for OpenMP /// region. class CleanupTy final : public EHScopeStack::Cleanup { @@ -2294,6 +2315,14 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); break; } + case OMPRTL__tgt_register_requires: { + // Build void __tgt_register_requires(int64_t flags); + llvm::Type *TypeParams[] = {CGM.Int64Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); + break; + } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = @@ -3838,6 +3867,36 @@ } llvm::Function * +CGOpenMPRuntime::createRequiresDirectiveRegistration() { + // If we don't have entries or if we are emitting code for the device, we + // don't need to do anything. + if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) + return nullptr; + + ASTContext &C = CGM.getContext(); + + llvm::Function *RequiresRegFn; + { + CodeGenFunction CGF(CGM); + const auto &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, {}); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + std::string ReqName = getName({"omp_offloading", "requires_reg"}); + RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); + CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); + int64_t Flags = OMP_REQ_NONE; + //TODO: check for other requires clauses. + if (CGF.CGM.HasRequiresUnifiedSharedMemory) { + Flags |= OMP_REQ_UNIFIED_SHARED_MEMORY; + } + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), + llvm::ConstantInt::get(CGF.CGM.Int64Ty, Flags)); + CGF.FinishFunction(); + } + return RequiresRegFn; +} + +llvm::Function * CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { // If we don't have entries or if we are emitting code for the device, we // don't need to do anything. @@ -7921,6 +7980,10 @@ MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types) const { + // If using unified memory, no need to do the mappings. + if (CGF.CGM.HasRequiresUnifiedSharedMemory) + return; + // Map other list items in the map clause which are not captured variables // but "declare target link" global variables., for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { @@ -8935,6 +8998,16 @@ " Expected target-based directive."); } +void CGOpenMPRuntime::checkArchForUnifiedAddressing( + CodeGenModule &CGM, const OMPRequiresDecl *D) const { + for (const OMPClause *Clause : D->clauselists()) { + if (Clause->getClauseKind() == OMPC_unified_shared_memory) { + CGM.HasRequiresUnifiedSharedMemory = true; + break; + } + } +} + bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) { if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) @@ -8993,6 +9066,11 @@ return !AlreadyEmittedTargetFunctions.insert(Name).second; } +llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { + // Create and register the function that handles the requires directives. + return createRequiresDirectiveRegistration(); +} + llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { // If we have offloading in the current module, we need to emit the entries // now and register the offloading descriptor. Index: lib/CodeGen/CGDecl.cpp =================================================================== --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -2573,5 +2573,5 @@ } void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) { - getOpenMPRuntime().checkArchForUnifiedAddressing(D); + getOpenMPRuntime().checkArchForUnifiedAddressing(*this, D); }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits