arpith-jacob created this revision.
arpith-jacob added reviewers: ABataev, sfantao, carlo.bertolli, kkwli0, caomhin.
arpith-jacob added a subscriber: cfe-commits.
Herald added a subscriber: jholewinski.

This patch cleans up private methods for NVPTX OpenMP codegen.  It converts 
private members to static functions to follow the coding style of 
CGOpenMPRuntime.cpp and declutter the header file.


https://reviews.llvm.org/D28124

Files:
  lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  lib/CodeGen/CGOpenMPRuntimeNVPTX.h

Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -50,38 +50,6 @@
 
 private:
   //
-  // NVPTX calls.
-  //
-
-  /// \brief Get the GPU warp size.
-  llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF);
-
-  /// \brief Get the id of the current thread on the GPU.
-  llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF);
-
-  // \brief Get the maximum number of threads in a block of the GPU.
-  llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF);
-
-  /// \brief Get barrier to synchronize all threads in a block.
-  void getNVPTXCTABarrier(CodeGenFunction &CGF);
-
-  // \brief Synchronize all GPU threads in a block.
-  void syncCTAThreads(CodeGenFunction &CGF);
-
-  //
-  // OMP calls.
-  //
-
-  /// \brief Get the thread id of the OMP master thread.
-  /// The master thread id is the first thread (lane) of the last warp in the
-  /// GPU block.  Warp size is assumed to be some power of 2.
-  /// Thread id is 0 indexed.
-  /// E.g: If NumThreads is 33, master id is 32.
-  ///      If NumThreads is 64, master id is 32.
-  ///      If NumThreads is 1024, master id is 992.
-  llvm::Value *getMasterThreadID(CodeGenFunction &CGF);
-
-  //
   // Private state and methods.
   //
 
Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -20,76 +20,74 @@
 using namespace clang;
 using namespace CodeGen;
 
-/// \brief Get the GPU warp size.
-llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXWarpSize(CodeGenFunction &CGF) {
+namespace {
+enum OpenMPRTLFunctionNVPTX {
+  /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
+  /// kmp_int32 thread_limit);
+  OMPRTL_NVPTX__kmpc_kernel_init,
+};
+
+// NVPTX Address space
+enum ADDRESS_SPACE {
+  ADDRESS_SPACE_SHARED = 3,
+};
+} // namespace
+
+/// Get the GPU warp size.
+static llvm::Value *GetNVPTXWarpSize(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
   return Bld.CreateCall(
       llvm::Intrinsic::getDeclaration(
-          &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
+          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
       llvm::None, "nvptx_warp_size");
 }
 
-/// \brief Get the id of the current thread on the GPU.
-llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXThreadID(CodeGenFunction &CGF) {
+/// Get the id of the current thread on the GPU.
+static llvm::Value *GetNVPTXThreadID(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
   return Bld.CreateCall(
       llvm::Intrinsic::getDeclaration(
-          &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
+          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
       llvm::None, "nvptx_tid");
 }
 
-// \brief Get the maximum number of threads in a block of the GPU.
-llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXNumThreads(CodeGenFunction &CGF) {
+/// Get the maximum number of threads in a block of the GPU.
+static llvm::Value *GetNVPTXNumThreads(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
   return Bld.CreateCall(
       llvm::Intrinsic::getDeclaration(
-          &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
+          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
       llvm::None, "nvptx_num_threads");
 }
 
-/// \brief Get barrier to synchronize all threads in a block.
-void CGOpenMPRuntimeNVPTX::getNVPTXCTABarrier(CodeGenFunction &CGF) {
+/// Get barrier to synchronize all threads in a block.
+static void GetNVPTXCTABarrier(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
   Bld.CreateCall(llvm::Intrinsic::getDeclaration(
-      &CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
+      &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
 }
 
-// \brief Synchronize all GPU threads in a block.
-void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
-  getNVPTXCTABarrier(CGF);
-}
+/// Synchronize all GPU threads in a block.
+static void SyncCTAThreads(CodeGenFunction &CGF) { GetNVPTXCTABarrier(CGF); }
 
-/// \brief Get the thread id of the OMP master thread.
+/// Get the thread id of the OMP master thread.
 /// The master thread id is the first thread (lane) of the last warp in the
 /// GPU block.  Warp size is assumed to be some power of 2.
 /// Thread id is 0 indexed.
 /// E.g: If NumThreads is 33, master id is 32.
 ///      If NumThreads is 64, master id is 32.
 ///      If NumThreads is 1024, master id is 992.
-llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
+static llvm::Value *GetMasterThreadID(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
-  llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
+  llvm::Value *NumThreads = GetNVPTXNumThreads(CGF);
 
   // We assume that the warp size is a power of 2.
-  llvm::Value *Mask = Bld.CreateSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
+  llvm::Value *Mask = Bld.CreateSub(GetNVPTXWarpSize(CGF), Bld.getInt32(1));
 
   return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)),
                        Bld.CreateNot(Mask), "master_tid");
 }
 
-namespace {
-enum OpenMPRTLFunctionNVPTX {
-  /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
-  /// kmp_int32 thread_limit);
-  OMPRTL_NVPTX__kmpc_kernel_init,
-};
-
-// NVPTX Address space
-enum ADDRESS_SPACE {
-  ADDRESS_SPACE_SHARED = 3,
-};
-} // namespace
-
 CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
     CodeGenModule &CGM)
     : WorkerFn(nullptr), CGFI(nullptr) {
@@ -164,7 +162,7 @@
   // Workers wait for work from master.
   CGF.EmitBlock(AwaitBB);
   // Wait for parallel work
-  syncCTAThreads(CGF);
+  SyncCTAThreads(CGF);
   // On termination condition (workid == 0), exit loop.
   llvm::Value *ShouldTerminate = Bld.CreateICmpEQ(
       Bld.CreateAlignedLoad(WorkID, WorkID->getAlignment()),
@@ -174,7 +172,7 @@
 
   // Activate requested workers.
   CGF.EmitBlock(SelectWorkersBB);
-  llvm::Value *ThreadID = getNVPTXThreadID(CGF);
+  llvm::Value *ThreadID = GetNVPTXThreadID(CGF);
   llvm::Value *ActiveThread = Bld.CreateICmpSLT(
       ThreadID,
       Bld.CreateAlignedLoad(ActiveWorkers, ActiveWorkers->getAlignment()),
@@ -192,7 +190,7 @@
   // All active and inactive workers wait at a barrier after parallel region.
   CGF.EmitBlock(BarrierBB);
   // Barrier after parallel region.
-  syncCTAThreads(CGF);
+  SyncCTAThreads(CGF);
   CGF.EmitBranch(AwaitBB);
 
   // Exit target region.
@@ -206,9 +204,9 @@
   CGBuilderTy &Bld = CGF.Builder;
 
   // Get the master thread id.
-  llvm::Value *MasterID = getMasterThreadID(CGF);
+  llvm::Value *MasterID = GetMasterThreadID(CGF);
   // Current thread's identifier.
-  llvm::Value *ThreadID = getNVPTXThreadID(CGF);
+  llvm::Value *ThreadID = GetNVPTXThreadID(CGF);
 
   // Setup BBs in entry function.
   llvm::BasicBlock *WorkerCheckBB = CGF.createBasicBlock(".check.for.worker");
@@ -237,7 +235,7 @@
 
   // First action in sequential region:
   // Initialize the state of the OpenMP runtime library on the GPU.
-  llvm::Value *Args[] = {Bld.getInt32(/*OmpHandle=*/0), getNVPTXThreadID(CGF)};
+  llvm::Value *Args[] = {Bld.getInt32(/*OmpHandle=*/0), GetNVPTXThreadID(CGF)};
   CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init),
                       Args);
 }
@@ -257,7 +255,7 @@
       llvm::Constant::getNullValue(WorkID->getType()->getElementType()), WorkID,
       WorkID->getAlignment());
   // Barrier to terminate worker threads.
-  syncCTAThreads(CGF);
+  SyncCTAThreads(CGF);
   // Master thread jumps to exit point.
   CGF.EmitBranch(EST.ExitBB);
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to