llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Joseph Huber (jhuber6) <details> <summary>Changes</summary> Summary: This new attribute was introduced recently. We already do this for NVPTX kernels so we should apply this for AMDGPU as well. This patch simply applies this metadata in cases where a lower bound is known --- Full diff: https://github.com/llvm/llvm-project/pull/87695.diff 2 Files Affected: - (added) clang/test/OpenMP/thread_limit_amdgpu.c (+34) - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+3) ``````````diff diff --git a/clang/test/OpenMP/thread_limit_amdgpu.c b/clang/test/OpenMP/thread_limit_amdgpu.c new file mode 100644 index 00000000000000..f884eeb73c3ff1 --- /dev/null +++ b/clang/test/OpenMP/thread_limit_amdgpu.c @@ -0,0 +1,34 @@ +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +void foo(int N) { +#pragma omp target teams distribute parallel for simd + for (int i = 0; i < N; ++i) + ; +#pragma omp target teams distribute parallel for simd thread_limit(4) + for (int i = 0; i < N; ++i) + ; +#pragma omp target teams distribute parallel for simd ompx_attribute(__attribute__((launch_bounds(42, 42)))) + for (int i = 0; i < N; ++i) + ; +#pragma omp target teams distribute parallel for simd ompx_attribute(__attribute__((launch_bounds(42, 42)))) num_threads(22) + for (int i = 0; i < N; ++i) + ; +} + +#endif + +// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l10({{.*}}) #[[ATTR1:.+]] { +// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l13({{.*}}) #[[ATTR2:.+]] { +// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l16({{.*}}) #[[ATTR3:.+]] { +// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l19({{.*}}) #[[ATTR4:.+]] { + +// CHECK: attributes #[[ATTR1]] = { {{.*}} "amdgpu-flat-work-group-size"="1,256" {{.*}} } +// CHECK: attributes #[[ATTR2]] = { {{.*}} "amdgpu-flat-work-group-size"="1,4" {{.*}} } +// CHECK: attributes #[[ATTR3]] = { {{.*}} "amdgpu-flat-work-group-size"="1,42" "amdgpu-max-num-workgroups"="42,1,1"{{.*}} } +// CHECK: attributes #[[ATTR4]] = { {{.*}} "amdgpu-flat-work-group-size"="1,22" "amdgpu-max-num-workgroups"="42,1,1"{{.*}} } diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 16507a69ea8502..4fe44b10d1bd0e 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4791,6 +4791,9 @@ void OpenMPIRBuilder::writeTeamsForKernel(const Triple &T, Function &Kernel, updateNVPTXMetadata(Kernel, "maxclusterrank", UB, true); updateNVPTXMetadata(Kernel, "minctasm", LB, false); } + if (T.isAMDGPU()) { + Kernel.addFnAttr("amdgpu-max-num-workgroups", llvm::utostr(LB) + ",1,1"); + } Kernel.addFnAttr("omp_target_num_teams", std::to_string(LB)); } `````````` </details> https://github.com/llvm/llvm-project/pull/87695 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits