https://github.com/Artem-B updated https://github.com/llvm/llvm-project/pull/102661
>From 0f3944e1c12baa958f52c3c015a0cf5f9aeff1ed Mon Sep 17 00:00:00 2001 From: Artem Belevich <t...@google.com> Date: Fri, 9 Aug 2024 11:51:23 -0700 Subject: [PATCH 1/2] [CUDA] Emit used function list in deterministic order. Fixes https://github.com/llvm/llvm-project/issues/101560 --- clang/lib/CodeGen/CodeGenModule.cpp | 3 +++ .../host-used-extern-determinism.cu | 21 +++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 clang/test/CodeGenCUDA/host-used-extern-determinism.cu diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 9aaf90ccfe04f..aefedeffab614 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -950,6 +950,9 @@ void CodeGenModule::Release() { UsedArray.push_back(llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( GetAddrOfGlobal(GD), Int8PtrTy)); } + // Sort decls by name to always emit them in deterministic order. + llvm::sort(UsedArray, + [](auto A, auto B) { return A->getName() < B->getName(); }); llvm::ArrayType *ATy = llvm::ArrayType::get(Int8PtrTy, UsedArray.size()); diff --git a/clang/test/CodeGenCUDA/host-used-extern-determinism.cu b/clang/test/CodeGenCUDA/host-used-extern-determinism.cu new file mode 100644 index 0000000000000..0c4e966f5de6f --- /dev/null +++ b/clang/test/CodeGenCUDA/host-used-extern-determinism.cu @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ +// RUN: -fgpu-rdc -std=c++11 -emit-llvm -o - -target-cpu gfx906 | FileCheck %s + +#include "Inputs/cuda.h" + +// CHECK-LABEL: @__clang_gpu_used_externalx = internal {{.*}}global +// References to the kernels must be in sorted order. +// CHECK-SAME: [ptr @_Z6kernelILi0EEvPi, ptr @_Z6kernelILi1EEvPi, ptr @_Z6kernelILi2EEvPi, ptr @_Z6kernelILi3EEvPi] + +template <int N> +__global__ void kernel(int* out) { *out = N; } + +void host(int n) { + void * k; + switch (n) { + case 3: k = (void*)&kernel<3>; break; + case 1: k = (void*)&kernel<1>; break; + case 2: k = (void*)&kernel<2>; break; + case 0: k = (void*)&kernel<0>; break; + } +} >From d94579dde791e6c91ba915f15cb03bab6409107a Mon Sep 17 00:00:00 2001 From: Artem Belevich <t...@google.com> Date: Fri, 9 Aug 2024 15:12:18 -0700 Subject: [PATCH 2/2] Switched to SetVector to maintain deterministic order. --- clang/include/clang/AST/ASTContext.h | 5 +++-- clang/lib/CodeGen/CodeGenModule.cpp | 3 --- clang/test/CodeGenCUDA/host-used-extern-determinism.cu | 6 +++--- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 94cf9113cd43a..58a820508da42 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -34,6 +34,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -1194,8 +1195,8 @@ class ASTContext : public RefCountedBase<ASTContext> { llvm::DenseSet<const VarDecl *> CUDADeviceVarODRUsedByHost; /// Keep track of CUDA/HIP external kernels or device variables ODR-used by - /// host code. - llvm::DenseSet<const ValueDecl *> CUDAExternalDeviceDeclODRUsedByHost; + /// host code. SetVector is used to maintain the order. + llvm::SetVector<const ValueDecl *> CUDAExternalDeviceDeclODRUsedByHost; /// Keep track of CUDA/HIP implicit host device functions used on device side /// in device compilation. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index aefedeffab614..9aaf90ccfe04f 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -950,9 +950,6 @@ void CodeGenModule::Release() { UsedArray.push_back(llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( GetAddrOfGlobal(GD), Int8PtrTy)); } - // Sort decls by name to always emit them in deterministic order. - llvm::sort(UsedArray, - [](auto A, auto B) { return A->getName() < B->getName(); }); llvm::ArrayType *ATy = llvm::ArrayType::get(Int8PtrTy, UsedArray.size()); diff --git a/clang/test/CodeGenCUDA/host-used-extern-determinism.cu b/clang/test/CodeGenCUDA/host-used-extern-determinism.cu index 0c4e966f5de6f..1e52887b894b1 100644 --- a/clang/test/CodeGenCUDA/host-used-extern-determinism.cu +++ b/clang/test/CodeGenCUDA/host-used-extern-determinism.cu @@ -3,9 +3,9 @@ #include "Inputs/cuda.h" -// CHECK-LABEL: @__clang_gpu_used_externalx = internal {{.*}}global -// References to the kernels must be in sorted order. -// CHECK-SAME: [ptr @_Z6kernelILi0EEvPi, ptr @_Z6kernelILi1EEvPi, ptr @_Z6kernelILi2EEvPi, ptr @_Z6kernelILi3EEvPi] +// CHECK-LABEL: @__clang_gpu_used_external = internal {{.*}}global +// References to the kernels must be in order of appearance. +// CHECK-SAME: [ptr @_Z6kernelILi3EEvPi, ptr @_Z6kernelILi1EEvPi, ptr @_Z6kernelILi2EEvPi, ptr @_Z6kernelILi0EEvPi] template <int N> __global__ void kernel(int* out) { *out = N; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits