Author: Yaxun (Sam) Liu Date: 2020-10-15T17:25:55-04:00 New Revision: e384e94fbe7c1d5c89fcdde33ffda04e9802c2ce
URL: https://github.com/llvm/llvm-project/commit/e384e94fbe7c1d5c89fcdde33ffda04e9802c2ce DIFF: https://github.com/llvm/llvm-project/commit/e384e94fbe7c1d5c89fcdde33ffda04e9802c2ce.diff LOG: Revert "[HIP] Change default --gpu-max-threads-per-block value to 1024" This reverts commit 187658b8a6112446d9e7797d495bc7542ac83905 due to AMDGPU backend issues. Added: Modified: clang/include/clang/Basic/LangOptions.def clang/lib/CodeGen/TargetInfo.cpp clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu clang/test/CodeGenCUDA/kernel-amdgcn.cu Removed: ################################################################################ diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 3132e7635418..9846809763f8 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -240,7 +240,7 @@ LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr function LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions") LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code") LANGOPT(GPUAllowDeviceInit, 1, 0, "allowing device side global init functions for HIP") -LANGOPT(GPUMaxThreadsPerBlock, 32, 1024, "default max threads per block for kernel launch bounds for HIP") +LANGOPT(GPUMaxThreadsPerBlock, 32, 256, "default max threads per block for kernel launch bounds for HIP") LANGOPT(SYCL , 1, 0, "SYCL") LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device") diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 31fbe393e8d2..83b97775b96c 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -9059,13 +9059,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( assert(Max == 0 && "Max must be zero"); } else if (IsOpenCLKernel || IsHIPKernel) { // By default, restrict the maximum size to a value specified by - // --gpu-max-threads-per-block=n or its default value for HIP. - const unsigned OpenCLDefaultMaxWorkGroupSize = 256; - const unsigned DefaultMaxWorkGroupSize = - IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize - : M.getLangOpts().GPUMaxThreadsPerBlock; + // --gpu-max-threads-per-block=n or its default value. std::string AttrVal = - std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize); + std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock); F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu index 7a9fd2527272..5415bddffc89 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu @@ -39,7 +39,7 @@ __global__ void num_vgpr_64() { // NAMD-NOT: "amdgpu-num-vgpr" // NAMD-NOT: "amdgpu-num-sgpr" -// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true" +// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true" // MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64" // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2" diff --git a/clang/test/CodeGenCUDA/kernel-amdgcn.cu b/clang/test/CodeGenCUDA/kernel-amdgcn.cu index 6066469f7647..135d3030480c 100644 --- a/clang/test/CodeGenCUDA/kernel-amdgcn.cu +++ b/clang/test/CodeGenCUDA/kernel-amdgcn.cu @@ -39,4 +39,4 @@ int main() { launch((void*)D.Empty()); return 0; } -// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" +// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,256" _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits