ABataev added inline comments. ================ Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3790 @@ -3923,3 +3789,3 @@ // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the - // mangled name of the function that encloses the target region and BB is the + // mangled name of the function that encloses the target region, BB is the // line number of the target region. ---------------- Revert back
================ Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3796-3808 @@ -3929,13 +3795,15 @@ unsigned Line; + SmallString<256> OutlinedFnName; getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, Line); - SmallString<64> EntryFnName; { - llvm::raw_svector_ostream OS(EntryFnName); + llvm::raw_svector_ostream OS(OutlinedFnName); OS << "__omp_offloading" << llvm::format("_%x", DeviceID) << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; } + const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + CodeGenFunction CGF(CGM, true); - CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); + CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, OutlinedFnName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); ---------------- What is the purpose of these massive changes? ================ Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3918-3919 @@ -3917,4 +3786,1 @@ - - // Create a unique name for the entry function using the source location - // information of the current target region. The name will be something like: // ---------------- Restore it back, please. ================ Comment at: lib/CodeGen/CGOpenMPRuntime.h:50-54 @@ -49,2 +49,7 @@ class CGOpenMPRuntime { +protected: CodeGenModule &CGM; + + enum OpenMPRTLFunction { + /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, + /// kmpc_micro microtask, ...); ---------------- No way!!! Revert it back, please. No need to expose all these stuff in header file ================ Comment at: lib/CodeGen/CGOpenMPRuntime.h:383 @@ -253,2 +382,3 @@ +protected: /// \brief Creates offloading entry for the provided entry ID \a ID, ---------------- Please, gather all 'protected' members in a single 'protected' section ================ Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp:30 @@ +29,3 @@ +void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID, + llvm::Constant *Addr, + uint64_t Size) { ---------------- If you expect 'llvm::Function*' here, why you can't change it to 'llvm::Function*'? ================ Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp:47 @@ +46,3 @@ + MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); + return; +} ---------------- Remove, not required ================ Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp:133 @@ +132,3 @@ + + CodeGenFunction CGF(CGM, true); + CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {}); ---------------- Comment for 'true' value ================ Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp:242 @@ +241,3 @@ + +void CGOpenMPRuntimeNVPTX::emitEntryFooter(CodeGenFunction &CGF, + EntryFunctionState &EST) { ---------------- What about exceptions? Do you plan to support them? If yes, add tests for classes with constructors/destructors and exceptions ================ Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:30 @@ +29,3 @@ + +private: + /// \brief Creates offloading entry for the provided entry ID \a ID, ---------------- Please, gather all private members in single 'private' section, protected to single 'protected' and public to single 'public' ================ Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:36-41 @@ +35,8 @@ + + enum OpenMPRTLFunctionNVPTX { + /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle, + /// kmp_int32 thread_limit); + OMPRTL_NVPTX__kmpc_kernel_init = OMPRTL_last + 1, + }; + + /// \brief Returns specified OpenMP runtime function for the current OpenMP ---------------- No way!!! This must go to .cpp file! ================ Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:68-102 @@ +67,37 @@ + + /// \brief Get the GPU warp size. + llvm::Value *GetNVPTXWarpSize(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + return Bld.CreateCall( + llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), + {}, "nvptx_warp_size"); + } + + /// \brief Get the id of the current thread on the GPU. + llvm::Value *GetNVPTXThreadID(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + return Bld.CreateCall( + llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x), + {}, "nvptx_tid"); + } + + // \brief Get the maximum number of threads in a block of the GPU. + llvm::Value *GetNVPTXNumThreads(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + return Bld.CreateCall( + llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x), + {}, "nvptx_num_threads"); + } + + /// \brief Get barrier to synchronize all threads in a block. + void GetNVPTXCTABarrier(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + Bld.CreateCall(llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::nvvm_barrier0), + {}); + } + + // \brief Synchronize all GPU threads in a block. ---------------- Move all implementations to .cpp file ================ Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:117-126 @@ +116,12 @@ + /// If NumThreads is 1024, master id is 992. + llvm::Value *GetMasterThreadID(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + llvm::Value *NumThreads = GetNVPTXNumThreads(CGF); + + // We assume that the warp size is a power of 2. + llvm::Value *Mask = Bld.CreateSub(GetNVPTXWarpSize(CGF), Bld.getInt32(1)); + + return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)), + Bld.CreateNot(Mask), "master_tid"); + } + ---------------- Implementation must be in .cpp ================ Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:129-131 @@ +128,5 @@ +private: + // NVPTX Address space + enum ADDRESS_SPACE { GLOBAL_ADDRESS_SPACE = 1, SHARED_ADDRESS_SPACE = 3 }; + + // Master-worker control state. ---------------- Again, move it to .cpp ================ Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:138-172 @@ +137,37 @@ + + class EntryFunctionState { + public: + llvm::BasicBlock *ExitBB; + + EntryFunctionState() : ExitBB(nullptr){}; + }; + + class WorkerFunctionState { + public: + llvm::Function *WorkerFn; + const CGFunctionInfo *CGFI; + + WorkerFunctionState(CodeGenModule &CGM) : WorkerFn(nullptr), CGFI(nullptr) { + createWorkerFunction(CGM); + }; + + private: + void createWorkerFunction(CodeGenModule &CGM) { + auto &Ctx = CGM.getContext(); + + // Create an worker function with no arguments. + FunctionType::ExtInfo EI; + CGFI = &CGM.getTypes().arrangeFreeFunctionDeclaration( + Ctx.VoidTy, {}, EI, /*isVariadic=*/false); + + WorkerFn = + llvm::Function::Create(CGM.getTypes().GetFunctionType(*CGFI), + llvm::GlobalValue::InternalLinkage, + /* placeholder */ "_worker", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI); + WorkerFn->setLinkage(llvm::GlobalValue::InternalLinkage); + WorkerFn->addFnAttr(llvm::Attribute::NoInline); + } + }; + + /// \brief Initialize master-worker control state. ---------------- You can leave just declaration here, as this class is used by reference always. Definition must be moved to .cpp http://reviews.llvm.org/D17877 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits