Author: Jon Chesterfield Date: 2021-08-20T18:17:27+01:00 New Revision: b1efeface70c26f2f2e30636943c02f356ce4faa
URL: https://github.com/llvm/llvm-project/commit/b1efeface70c26f2f2e30636943c02f356ce4faa DIFF: https://github.com/llvm/llvm-project/commit/b1efeface70c26f2f2e30636943c02f356ce4faa.diff LOG: Revert "[openmp][nfc] Refactor GridValues" Failed a nvptx codegen test This reverts commit 2a47a84b40115b01e03e4d89c1d47ba74beb7bf3. Added: Modified: clang/include/clang/Basic/TargetInfo.h clang/lib/Basic/Targets/AMDGPU.cpp clang/lib/Basic/Targets/AMDGPU.h clang/lib/Basic/Targets/NVPTX.cpp clang/lib/Basic/Targets/NVPTX.h clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h Removed: ################################################################################ diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index fe6f67d40b53..ab855948b447 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -210,6 +210,9 @@ class TargetInfo : public virtual TransferrableTargetInfo, unsigned char RegParmMax, SSERegParmMax; TargetCXXABI TheCXXABI; const LangASMap *AddrSpaceMap; + const llvm::omp::GV *GridValues = + nullptr; // target-specific GPU grid values that must be + // consistent between host RTL (plugin), device RTL, and clang. mutable StringRef PlatformName; mutable VersionTuple PlatformMinVersion; @@ -1407,10 +1410,10 @@ class TargetInfo : public virtual TransferrableTargetInfo, return LangAS::Default; } - // access target-specific GPU grid values that must be consistent between - // host RTL (plugin), deviceRTL and clang. - virtual const llvm::omp::GV &getGridValue() const { - llvm_unreachable("getGridValue not implemented on this target"); + /// Return a target-specific GPU grid values + const llvm::omp::GV &getGridValue() const { + assert(GridValues != nullptr && "GridValues not initialized"); + return *GridValues; } /// Retrieve the name of the platform as it is used in the diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index ba7ffa34c73e..cebb19e7ccab 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -17,6 +17,7 @@ #include "clang/Basic/MacroBuilder.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Frontend/OpenMP/OMPGridValues.h" using namespace clang; using namespace clang::targets; @@ -334,6 +335,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, llvm::AMDGPU::getArchAttrR600(GPUKind)) { resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN : DataLayoutStringR600); + GridValues = &llvm::omp::AMDGPUGridValues; setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || !isAMDGCN(Triple)); diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index e791a83f38ae..77c2c5fd5014 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -370,10 +370,6 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { return getLangASFromTargetAS(Constant); } - const llvm::omp::GV &getGridValue() const override { - return llvm::omp::AMDGPUGridValues; - } - /// \returns Target specific vtbl ptr address space. unsigned getVtblPtrAddressSpace() const override { return static_cast<unsigned>(Constant); diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index c245753c93f4..d1a34e4a81c5 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -16,6 +16,7 @@ #include "clang/Basic/MacroBuilder.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Frontend/OpenMP/OMPGridValues.h" using namespace clang; using namespace clang::targets; @@ -64,6 +65,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, TLSSupported = false; VLASupported = false; AddrSpaceMap = &NVPTXAddrSpaceMap; + GridValues = &llvm::omp::NVPTXGridValues; UseAddrSpaceMapMangling = true; // Define available target features diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index ef751b8e1a8d..c7db3cdaaf10 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -147,10 +147,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { Opts["cl_khr_local_int32_extended_atomics"] = true; } - const llvm::omp::GV &getGridValue() const override { - return llvm::omp::NVPTXGridValues; - } - /// \returns If a target requires an address within a target specific address /// space \p AddressSpace to be converted in order to be used, then return the /// corresponding target specific DWARF address space. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 0e392c263471..b13d55994ef6 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -22,7 +22,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" #include "llvm/IR/IntrinsicsNVPTX.h" -#include "llvm/Support/MathExtras.h" using namespace clang; using namespace CodeGen; @@ -107,7 +106,8 @@ class ExecutionRuntimeModesRAII { /// is the same for all known NVPTX architectures. enum MachineConfiguration : unsigned { /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target - /// specific Grid Values like GV_Warp_Size, GV_Slot_Size + /// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2, + /// and GV_Warp_Size_Log2_Mask. /// Global memory alignment for performance. GlobalMemoryAlignment = 128, @@ -535,8 +535,7 @@ class CheckVarsEscapingDeclContext final /// on the NVPTX device, to generate more efficient code. static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; - unsigned LaneIDBits = - llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size); + unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2; auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime()); return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id"); } @@ -546,9 +545,8 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) { /// on the NVPTX device, to generate more efficient code. static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; - unsigned LaneIDBits = - llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size); - unsigned LaneIDMask = ~0 >> (32u - LaneIDBits); + unsigned LaneIDMask = + CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask; auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime()); return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask), "nvptx_lane_id"); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h index 2130b9bf91cd..1d7735ebf72d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h @@ -62,13 +62,19 @@ struct GV { const unsigned GV_Slot_Size; /// The default value of maximum number of threads in a worker warp. const unsigned GV_Warp_Size; - - constexpr unsigned warpSlotSize() const { - return GV_Warp_Size * GV_Slot_Size; - } - + /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size + /// for NVPTX. + const unsigned GV_Warp_Size_32; + /// The number of bits required to represent the max number of threads in warp + const unsigned GV_Warp_Size_Log2; + /// GV_Warp_Size * GV_Slot_Size, + const unsigned GV_Warp_Slot_Size; /// the maximum number of teams. const unsigned GV_Max_Teams; + /// Global Memory Alignment + const unsigned GV_Mem_Align; + /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) + const unsigned GV_Warp_Size_Log2_Mask; // An alternative to the heavy data sharing infrastructure that uses global // memory is one that uses device __shared__ memory. The amount of such space // (in bytes) reserved by the OpenMP runtime is noted here. @@ -77,32 +83,47 @@ struct GV { const unsigned GV_Max_WG_Size; // The default maximum team size for a working group const unsigned GV_Default_WG_Size; - - constexpr unsigned maxWarpNumber() const { - return GV_Max_WG_Size / GV_Warp_Size; - } + // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN. + const unsigned GV_Max_Warp_Number; + /// The slot size that should be reserved for a working warp. + /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) + const unsigned GV_Warp_Size_Log2_MaskL; }; /// For AMDGPU GPUs static constexpr GV AMDGPUGridValues = { - 448, // GV_Threads - 256, // GV_Slot_Size - 64, // GV_Warp_Size - 128, // GV_Max_Teams - 896, // GV_SimpleBufferSize - 1024, // GV_Max_WG_Size, - 256, // GV_Default_WG_Size + 448, // GV_Threads + 256, // GV_Slot_Size + 64, // GV_Warp_Size + 32, // GV_Warp_Size_32 + 6, // GV_Warp_Size_Log2 + 64 * 256, // GV_Warp_Slot_Size + 128, // GV_Max_Teams + 256, // GV_Mem_Align + 63, // GV_Warp_Size_Log2_Mask + 896, // GV_SimpleBufferSize + 1024, // GV_Max_WG_Size, + 256, // GV_Defaut_WG_Size + 1024 / 64, // GV_Max_WG_Size / GV_WarpSize + 63 // GV_Warp_Size_Log2_MaskL }; /// For Nvidia GPUs static constexpr GV NVPTXGridValues = { - 992, // GV_Threads - 256, // GV_Slot_Size - 32, // GV_Warp_Size - 1024, // GV_Max_Teams - 896, // GV_SimpleBufferSize - 1024, // GV_Max_WG_Size - 128, // GV_Default_WG_Size + 992, // GV_Threads + 256, // GV_Slot_Size + 32, // GV_Warp_Size + 32, // GV_Warp_Size_32 + 5, // GV_Warp_Size_Log2 + 32 * 256, // GV_Warp_Slot_Size + 1024, // GV_Max_Teams + 256, // GV_Mem_Align + (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask + 896, // GV_SimpleBufferSize + 1024, // GV_Max_WG_Size + 128, // GV_Defaut_WG_Size + 1024 / 32, // GV_Max_WG_Size / GV_WarpSize + 31 // GV_Warp_Size_Log2_MaskL }; } // namespace omp _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits