[PATCH] D80917: [OpenMP][AMDGCN] Support OpenMP offloading for AMDGCN architecture - Part 2

Saiyedul Islam via Phabricator via cfe-commits Mon, 01 Jun 2020 04:16:02 -0700

saiislam created this revision.
saiislam added reviewers: arsenm, sameerds, jdoerfert, yaxunl, b-sumner, 
scchan, JonChesterfield.
Herald added subscribers: cfe-commits, sstefan1, kerbowa, guansong, nhaehnle, 
wdng, jvesely, jholewinski.
Herald added a project: clang.
saiislam added a parent revision: D79754: [OpenMP][AMDGCN] Support OpenMP 
offloading for AMDGCN architecture - Part 1.


New file include to support platform dependent grid constants. It will be
used by clang, libomptarget plugins, and deviceRTLs to access constant
values consistently and with fast access in the deviceRTLs.

Originally authored by Greg Rodgers (@gregrodgers).


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D80917

Files:
  clang/include/clang/Basic/OpenMPGridValues.h
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Basic/Targets/NVPTX.cpp

Index: clang/lib/Basic/Targets/NVPTX.cpp
===================================================================
--- clang/lib/Basic/Targets/NVPTX.cpp
+++ clang/lib/Basic/Targets/NVPTX.cpp
@@ -14,6 +14,7 @@
 #include "Targets.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/MacroBuilder.h"
+#include "clang/Basic/OpenMPGridValues.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "llvm/ADT/StringSwitch.h"
 
@@ -62,6 +63,8 @@
   TLSSupported = false;
   VLASupported = false;
   AddrSpaceMap = &NVPTXAddrSpaceMap;
+  GridValues = (const int *)&(GPU::NVPTXGpuGridValues[0]);
+  LongGridValues = (const long long *)&(GPU::NVPTXGpuLongGridValues[0]);
   UseAddrSpaceMapMangling = true;
 
   // Define available target features
Index: clang/lib/Basic/Targets/AMDGPU.cpp
===================================================================
--- clang/lib/Basic/Targets/AMDGPU.cpp
+++ clang/lib/Basic/Targets/AMDGPU.cpp
@@ -15,6 +15,7 @@
 #include "clang/Basic/CodeGenOptions.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/MacroBuilder.h"
+#include "clang/Basic/OpenMPGridValues.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/DataLayout.h"
@@ -286,6 +287,8 @@
   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
                                         : DataLayoutStringR600);
   assert(DataLayout->getAllocaAddrSpace() == Private);
+  GridValues = (const int *)&(GPU::AMDGPUGpuGridValues[0]);
+  LongGridValues = (const long long *)&(GPU::AMDGPUGpuLongGridValues[0]);
 
   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
                      !isAMDGCN(Triple));
Index: clang/include/clang/Basic/TargetInfo.h
===================================================================
--- clang/include/clang/Basic/TargetInfo.h
+++ clang/include/clang/Basic/TargetInfo.h
@@ -15,8 +15,10 @@
 #define LLVM_CLANG_BASIC_TARGETINFO_H
 
 #include "clang/Basic/AddressSpaces.h"
+#include "clang/Basic/CodeGenOptions.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/LangOptions.h"
+#include "clang/Basic/OpenMPGridValues.h"
 #include "clang/Basic/Specifiers.h"
 #include "clang/Basic/TargetCXXABI.h"
 #include "clang/Basic/TargetOptions.h"
@@ -196,6 +198,8 @@
   unsigned char RegParmMax, SSERegParmMax;
   TargetCXXABI TheCXXABI;
   const LangASMap *AddrSpaceMap;
+  const int *GridValues;
+  const long long int *LongGridValues;
 
   mutable StringRef PlatformName;
   mutable VersionTuple PlatformMinVersion;
@@ -1306,6 +1310,12 @@
     return LangAS::Default;
   }
 
+  int getGridValue(GPU::GVIDX gv) const { return GridValues[gv]; }
+
+  long long getLongGridValue(GPU::GVLIDX gv) const {
+    return LongGridValues[gv];
+  }
+
   /// Retrieve the name of the platform as it is used in the
   /// availability attribute.
   StringRef getPlatformName() const { return PlatformName; }
Index: clang/include/clang/Basic/OpenMPGridValues.h
===================================================================
--- /dev/null
+++ clang/include/clang/Basic/OpenMPGridValues.h
@@ -0,0 +1,134 @@
+//===--- OpenMPGridValues.h - Language-specific address spaces --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Provides definitions for Target specific Grid Values
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_BASIC_OPENMPGRIDVALUES_H
+#define LLVM_CLANG_BASIC_OPENMPGRIDVALUES_H
+
+namespace clang {
+
+namespace GPU {
+
+/// \brief Defines various target-specific Gpu grid values that must be
+///        consistent between host RTL (plugin), device RTL, and clang.
+///        By adding this to TargetInfo in clang, we can change grid values
+///        for a "fat" binary so that different passes get the correct values
+///        when generating code for a multi-target binary. Both amdgcn
+///        and nvptx values are stored in this file. In the future, should
+///        there be differences between GPUs of the same architecture,
+///        then simply make a different array and use the new array name.
+///
+/// Example usage in clang:
+///   const unsigned slot_size = ctx.GetTargetInfo().getGridValue(GV_Warp_Size);
+///
+/// Example usage in libomptarget/deviceRTLs:
+///   #include "OpenMPGridValues.h"
+///   #ifdef __AMDGPU__
+///     #define GRIDVAL AMDGPUGpuGridValues
+///   #else
+///     #define GRIDVAL NVPTXGpuGridValues
+///   #endif
+///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
+///   GRIDVAL[GV_Warp_Size]
+///
+/// Example usage in libomptarget hsa plugin:
+///   #include "OpenMPGridValues.h"
+///   #define GRIDVAL AMDGPUGpuGridValues
+///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
+///   GRIDVAL[GV_Warp_Size]
+///
+/// Example usage in libomptarget cuda plugin:
+///    #include "OpenMPGridValues.h"
+///    #define GRIDVAL NVPTXGpuGridValues
+///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
+///    GRIDVAL[GV_Warp_Size]
+///
+enum GVIDX {
+  /// The maximum number of workers in a kernel.
+  /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
+  GV_Threads,
+  /// The size reserved for data in a shared memory slot.
+  GV_Slot_Size,
+  /// The maximum number of threads in a worker warp.
+  GV_Warp_Size,
+  /// The number of bits required to represent the max number of threads in warp
+  GV_Warp_Size_Log2,
+  /// GV_Warp_Size * GV_Slot_Size,
+  GV_Warp_Slot_Size,
+  /// the maximum number of teams.
+  GV_Max_Teams,
+  /// Global Memory Alignment
+  GV_Mem_Align,
+  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
+  GV_Warp_Size_Log2_Mask,
+  // An alternative to the heavy data sharing infrastructure that uses global
+  // memory is one that uses device __shared__ memory.  The amount of such space
+  // (in bytes) reserved by the OpenMP runtime is noted here.
+  GV_SimpleBufferSize,
+  // The absolute maximum team size for a working group
+  GV_Max_WG_Size,
+  // The default maximum team size for a working group
+  GV_Default_WG_Size,
+  // This is GV_Max_WG_Size / GV_WarpSize. 32 for Nvidia and 16 for AMD.
+  GV_Max_Warp_Number
+};
+
+enum GVLIDX {
+  /// The slot size that should be reserved for a working warp.
+  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
+  GV_Warp_Size_Log2_MaskL
+};
+
+/// For AMDGPU GPUs
+static constexpr int AMDGPUGpuGridValues[] = {
+    448,      // GV_Threads   FIXME:  How can we make this bigger?
+    256,      // GV_Slot_Size
+    64,       // GV_Warp_Size
+    6,        // GV_Warp_Size_Log2
+    64 * 256, // GV_Warp_Slot_Size
+    128,      // GV_Max_Teams
+    256,      // GV_Mem_Align
+    63,       // GV_Warp_Size_Log2_Mask
+    896,      // GV_SimpleBufferSize
+    1024,     // GV_Max_WG_Size,
+    256,      // GV_Defaut_WG_Size
+    1024 / 64 // This is GV_Max_WG_Size / GV_WarpSize
+
+};
+static constexpr long long AMDGPUGpuLongGridValues[] = {
+    63 // GV_Warp_Size_Log2_MaskL
+};
+
+/// For Nvidia GPUs
+static constexpr int NVPTXGpuGridValues[] = {
+    992,               // GV_Threads
+    256,               // GV_Slot_Size
+    32,                // GV_Warp_Size
+    5,                 // GV_Warp_Size_Log2
+    32 * 256,          // GV_Warp_Slot_Size
+    1024,              // GV_Max_Teams
+    256,               // GV_Mem_Align
+    (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
+    896,               // GV_SimpleBufferSize
+    1024,              // GV_Max_WG_Size
+    128,               // GV_Defaut_WG_Size
+    1024 / 32          // GV_Max_WG_Size / GV_WarpSize
+};
+
+static constexpr long long NVPTXGpuLongGridValues[] = {
+    31 // GV_Warp_Size_Log2_MaskL
+};
+
+} // namespace GPU
+} // namespace clang
+#endif

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80917: [OpenMP][AMDGCN] Support OpenMP offloading for AMDGCN architecture - Part 2

Reply via email to