JonChesterfield created this revision. JonChesterfield added reviewers: jdoerfert, grokos, ABataev, ronlieb, tianshilei1992. Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, jvesely, kzhuravl. JonChesterfield requested review of this revision. Herald added subscribers: openmp-commits, cfe-commits, sstefan1, wdng. Herald added projects: clang, OpenMP.
[libomptarget][amdgpu] Call into deviceRTL instead of ockl Amdgpu codegen presently emits a call into ockl. The same functionality is already present in the deviceRTL. Adds an amdgpu specific entry point to avoid the dependency. This lets simple openmp code (specifically, that which doesn't use libm) run without rocm device libraries installed. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D93356 Files: clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip Index: openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip =================================================================== --- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip +++ openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip @@ -144,6 +144,11 @@ return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); } +EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads(void) +{ + return GetNumberOfThreadsInBlock(); +} + // Stub implementations -DEVICE void *__kmpc_impl_malloc(size_t ) { return nullptr } +DEVICE void *__kmpc_impl_malloc(size_t ) { return nullptr; } DEVICE void __kmpc_impl_free(void *) {} Index: openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h =================================================================== --- openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h +++ openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h @@ -15,4 +15,6 @@ typedef uint64_t __kmpc_impl_lanemask_t; typedef uint32_t omp_lock_t; /* arbitrary type of the right length */ +EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads(void); + #endif Index: clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp +++ clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp @@ -49,13 +49,12 @@ llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; llvm::Module *M = &CGF.CGM.getModule(); - const char *LocSize = "__ockl_get_local_size"; + const char *LocSize = "__kmpc_amdgcn_gpu_num_threads"; llvm::Function *F = M->getFunction(LocSize); if (!F) { F = llvm::Function::Create( - llvm::FunctionType::get(CGF.Int64Ty, {CGF.Int32Ty}, false), + llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false), llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule()); } - return Bld.CreateTrunc( - Bld.CreateCall(F, {Bld.getInt32(0)}, "nvptx_num_threads"), CGF.Int32Ty); + return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); }
Index: openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip =================================================================== --- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip +++ openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip @@ -144,6 +144,11 @@ return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); } +EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads(void) +{ + return GetNumberOfThreadsInBlock(); +} + // Stub implementations -DEVICE void *__kmpc_impl_malloc(size_t ) { return nullptr } +DEVICE void *__kmpc_impl_malloc(size_t ) { return nullptr; } DEVICE void __kmpc_impl_free(void *) {} Index: openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h =================================================================== --- openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h +++ openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h @@ -15,4 +15,6 @@ typedef uint64_t __kmpc_impl_lanemask_t; typedef uint32_t omp_lock_t; /* arbitrary type of the right length */ +EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads(void); + #endif Index: clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp +++ clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp @@ -49,13 +49,12 @@ llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; llvm::Module *M = &CGF.CGM.getModule(); - const char *LocSize = "__ockl_get_local_size"; + const char *LocSize = "__kmpc_amdgcn_gpu_num_threads"; llvm::Function *F = M->getFunction(LocSize); if (!F) { F = llvm::Function::Create( - llvm::FunctionType::get(CGF.Int64Ty, {CGF.Int32Ty}, false), + llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false), llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule()); } - return Bld.CreateTrunc( - Bld.CreateCall(F, {Bld.getInt32(0)}, "nvptx_num_threads"), CGF.Int32Ty); + return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits