arpith-jacob created this revision. arpith-jacob added reviewers: ABataev, sfantao, carlo.bertolli, kkwli0, caomhin. arpith-jacob added a subscriber: cfe-commits. Herald added a subscriber: jholewinski.
This patch cleans up private methods for NVPTX OpenMP codegen. It converts private members to static functions to follow the coding style of CGOpenMPRuntime.cpp and declutter the header file. https://reviews.llvm.org/D28124 Files: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp lib/CodeGen/CGOpenMPRuntimeNVPTX.h
Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.h =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -50,38 +50,6 @@ private: // - // NVPTX calls. - // - - /// \brief Get the GPU warp size. - llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF); - - /// \brief Get the id of the current thread on the GPU. - llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF); - - // \brief Get the maximum number of threads in a block of the GPU. - llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF); - - /// \brief Get barrier to synchronize all threads in a block. - void getNVPTXCTABarrier(CodeGenFunction &CGF); - - // \brief Synchronize all GPU threads in a block. - void syncCTAThreads(CodeGenFunction &CGF); - - // - // OMP calls. - // - - /// \brief Get the thread id of the OMP master thread. - /// The master thread id is the first thread (lane) of the last warp in the - /// GPU block. Warp size is assumed to be some power of 2. - /// Thread id is 0 indexed. - /// E.g: If NumThreads is 33, master id is 32. - /// If NumThreads is 64, master id is 32. - /// If NumThreads is 1024, master id is 992. - llvm::Value *getMasterThreadID(CodeGenFunction &CGF); - - // // Private state and methods. // Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -20,76 +20,74 @@ using namespace clang; using namespace CodeGen; -/// \brief Get the GPU warp size. -llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXWarpSize(CodeGenFunction &CGF) { +namespace { +enum OpenMPRTLFunctionNVPTX { + /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle, + /// kmp_int32 thread_limit); + OMPRTL_NVPTX__kmpc_kernel_init, +}; + +// NVPTX Address space +enum ADDRESS_SPACE { + ADDRESS_SPACE_SHARED = 3, +}; +} // namespace + +/// Get the GPU warp size. +static llvm::Value *GetNVPTXWarpSize(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; return Bld.CreateCall( llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), llvm::None, "nvptx_warp_size"); } -/// \brief Get the id of the current thread on the GPU. -llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXThreadID(CodeGenFunction &CGF) { +/// Get the id of the current thread on the GPU. +static llvm::Value *GetNVPTXThreadID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; return Bld.CreateCall( llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x), + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x), llvm::None, "nvptx_tid"); } -// \brief Get the maximum number of threads in a block of the GPU. -llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXNumThreads(CodeGenFunction &CGF) { +/// Get the maximum number of threads in a block of the GPU. +static llvm::Value *GetNVPTXNumThreads(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; return Bld.CreateCall( llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x), + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x), llvm::None, "nvptx_num_threads"); } -/// \brief Get barrier to synchronize all threads in a block. -void CGOpenMPRuntimeNVPTX::getNVPTXCTABarrier(CodeGenFunction &CGF) { +/// Get barrier to synchronize all threads in a block. +static void GetNVPTXCTABarrier(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; Bld.CreateCall(llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::nvvm_barrier0)); + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0)); } -// \brief Synchronize all GPU threads in a block. -void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) { - getNVPTXCTABarrier(CGF); -} +/// Synchronize all GPU threads in a block. +static void SyncCTAThreads(CodeGenFunction &CGF) { GetNVPTXCTABarrier(CGF); } -/// \brief Get the thread id of the OMP master thread. +/// Get the thread id of the OMP master thread. /// The master thread id is the first thread (lane) of the last warp in the /// GPU block. Warp size is assumed to be some power of 2. /// Thread id is 0 indexed. /// E.g: If NumThreads is 33, master id is 32. /// If NumThreads is 64, master id is 32. /// If NumThreads is 1024, master id is 992. -llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) { +static llvm::Value *GetMasterThreadID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; - llvm::Value *NumThreads = getNVPTXNumThreads(CGF); + llvm::Value *NumThreads = GetNVPTXNumThreads(CGF); // We assume that the warp size is a power of 2. - llvm::Value *Mask = Bld.CreateSub(getNVPTXWarpSize(CGF), Bld.getInt32(1)); + llvm::Value *Mask = Bld.CreateSub(GetNVPTXWarpSize(CGF), Bld.getInt32(1)); return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)), Bld.CreateNot(Mask), "master_tid"); } -namespace { -enum OpenMPRTLFunctionNVPTX { - /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle, - /// kmp_int32 thread_limit); - OMPRTL_NVPTX__kmpc_kernel_init, -}; - -// NVPTX Address space -enum ADDRESS_SPACE { - ADDRESS_SPACE_SHARED = 3, -}; -} // namespace - CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState( CodeGenModule &CGM) : WorkerFn(nullptr), CGFI(nullptr) { @@ -164,7 +162,7 @@ // Workers wait for work from master. CGF.EmitBlock(AwaitBB); // Wait for parallel work - syncCTAThreads(CGF); + SyncCTAThreads(CGF); // On termination condition (workid == 0), exit loop. llvm::Value *ShouldTerminate = Bld.CreateICmpEQ( Bld.CreateAlignedLoad(WorkID, WorkID->getAlignment()), @@ -174,7 +172,7 @@ // Activate requested workers. CGF.EmitBlock(SelectWorkersBB); - llvm::Value *ThreadID = getNVPTXThreadID(CGF); + llvm::Value *ThreadID = GetNVPTXThreadID(CGF); llvm::Value *ActiveThread = Bld.CreateICmpSLT( ThreadID, Bld.CreateAlignedLoad(ActiveWorkers, ActiveWorkers->getAlignment()), @@ -192,7 +190,7 @@ // All active and inactive workers wait at a barrier after parallel region. CGF.EmitBlock(BarrierBB); // Barrier after parallel region. - syncCTAThreads(CGF); + SyncCTAThreads(CGF); CGF.EmitBranch(AwaitBB); // Exit target region. @@ -206,9 +204,9 @@ CGBuilderTy &Bld = CGF.Builder; // Get the master thread id. - llvm::Value *MasterID = getMasterThreadID(CGF); + llvm::Value *MasterID = GetMasterThreadID(CGF); // Current thread's identifier. - llvm::Value *ThreadID = getNVPTXThreadID(CGF); + llvm::Value *ThreadID = GetNVPTXThreadID(CGF); // Setup BBs in entry function. llvm::BasicBlock *WorkerCheckBB = CGF.createBasicBlock(".check.for.worker"); @@ -237,7 +235,7 @@ // First action in sequential region: // Initialize the state of the OpenMP runtime library on the GPU. - llvm::Value *Args[] = {Bld.getInt32(/*OmpHandle=*/0), getNVPTXThreadID(CGF)}; + llvm::Value *Args[] = {Bld.getInt32(/*OmpHandle=*/0), GetNVPTXThreadID(CGF)}; CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args); } @@ -257,7 +255,7 @@ llvm::Constant::getNullValue(WorkID->getType()->getElementType()), WorkID, WorkID->getAlignment()); // Barrier to terminate worker threads. - syncCTAThreads(CGF); + SyncCTAThreads(CGF); // Master thread jumps to exit point. CGF.EmitBranch(EST.ExitBB);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits