yaxunl created this revision. yaxunl added reviewers: tra, rjmccall. yaxunl edited the summary of this revision.
Currently clang fails to compile the following CUDA program in device compilation: __host__ int foo(int x) { return 1; } template<class T> __device__ __host__ int foo(T x) { return 2; } __device__ __host__ int bar() { return foo(1); } __global__ void test(int *a) { *a = bar(); } This is due to foo is resolved to the `__host__ foo` instead of `__device__ __host__ foo`. This seems to be a bug since `__device__ __host__ foo` is a viable callee for foo whereas clang is unable to choose it. nvcc has similar issue https://cuda.godbolt.org/z/bGijLc Although it only emits a warning and does not fail to compile. It emits a trap in the code so that it will fail at run time. This patch fixes that. https://reviews.llvm.org/D77954 Files: clang/lib/Sema/SemaOverload.cpp clang/test/SemaCUDA/function-overload.cu Index: clang/test/SemaCUDA/function-overload.cu =================================================================== --- clang/test/SemaCUDA/function-overload.cu +++ clang/test/SemaCUDA/function-overload.cu @@ -331,9 +331,6 @@ // If we have a mix of HD and H-only or D-only candidates in the overload set, // normal C++ overload resolution rules apply first. template <typename T> TemplateReturnTy template_vs_hd_function(T arg) -#ifdef __CUDA_ARCH__ -//expected-note@-2 {{declared here}} -#endif { return TemplateReturnTy(); } @@ -342,11 +339,13 @@ } __host__ __device__ void test_host_device_calls_hd_template() { - HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f); - TemplateReturnTy ret2 = template_vs_hd_function(1); #ifdef __CUDA_ARCH__ - // expected-error@-2 {{reference to __host__ function 'template_vs_hd_function<int>' in __host__ __device__ function}} + typedef HostDeviceReturnTy ExpectedReturnTy; +#else + typedef TemplateReturnTy ExpectedReturnTy; #endif + HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f); + ExpectedReturnTy ret2 = template_vs_hd_function(1); } __host__ void test_host_calls_hd_template() { Index: clang/lib/Sema/SemaOverload.cpp =================================================================== --- clang/lib/Sema/SemaOverload.cpp +++ clang/lib/Sema/SemaOverload.cpp @@ -9821,8 +9821,10 @@ llvm::any_of(Candidates, [&](OverloadCandidate *Cand) { // Check viable function only. return Cand->Viable && Cand->Function && - S.IdentifyCUDAPreference(Caller, Cand->Function) == - Sema::CFP_SameSide; + (S.IdentifyCUDAPreference(Caller, Cand->Function) == + Sema::CFP_SameSide || + S.IdentifyCUDAPreference(Caller, Cand->Function) == + Sema::CFP_HostDevice); }); if (ContainsSameSideCandidate) { auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
Index: clang/test/SemaCUDA/function-overload.cu =================================================================== --- clang/test/SemaCUDA/function-overload.cu +++ clang/test/SemaCUDA/function-overload.cu @@ -331,9 +331,6 @@ // If we have a mix of HD and H-only or D-only candidates in the overload set, // normal C++ overload resolution rules apply first. template <typename T> TemplateReturnTy template_vs_hd_function(T arg) -#ifdef __CUDA_ARCH__ -//expected-note@-2 {{declared here}} -#endif { return TemplateReturnTy(); } @@ -342,11 +339,13 @@ } __host__ __device__ void test_host_device_calls_hd_template() { - HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f); - TemplateReturnTy ret2 = template_vs_hd_function(1); #ifdef __CUDA_ARCH__ - // expected-error@-2 {{reference to __host__ function 'template_vs_hd_function<int>' in __host__ __device__ function}} + typedef HostDeviceReturnTy ExpectedReturnTy; +#else + typedef TemplateReturnTy ExpectedReturnTy; #endif + HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f); + ExpectedReturnTy ret2 = template_vs_hd_function(1); } __host__ void test_host_calls_hd_template() { Index: clang/lib/Sema/SemaOverload.cpp =================================================================== --- clang/lib/Sema/SemaOverload.cpp +++ clang/lib/Sema/SemaOverload.cpp @@ -9821,8 +9821,10 @@ llvm::any_of(Candidates, [&](OverloadCandidate *Cand) { // Check viable function only. return Cand->Viable && Cand->Function && - S.IdentifyCUDAPreference(Caller, Cand->Function) == - Sema::CFP_SameSide; + (S.IdentifyCUDAPreference(Caller, Cand->Function) == + Sema::CFP_SameSide || + S.IdentifyCUDAPreference(Caller, Cand->Function) == + Sema::CFP_HostDevice); }); if (ContainsSameSideCandidate) { auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits