Author: Artem Belevich Date: 2020-05-05T14:07:31-07:00 New Revision: bf6a26b066382e0f41bf023c781d84061c542307
URL: https://github.com/llvm/llvm-project/commit/bf6a26b066382e0f41bf023c781d84061c542307 DIFF: https://github.com/llvm/llvm-project/commit/bf6a26b066382e0f41bf023c781d84061c542307.diff LOG: Revert D77954 -- it breaks Eigen & Tensorflow. This reverts commit 55bcb96f3154808bcb5afc3fb46d8e00bf1db847. Added: Modified: clang/lib/Sema/SemaOverload.cpp clang/test/SemaCUDA/function-overload.cu Removed: ################################################################################ diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 57b650de3fee..c400d47dd2bd 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -9374,22 +9374,16 @@ static Comparison compareEnableIfAttrs(const Sema &S, const FunctionDecl *Cand1, return Comparison::Equal; } -static Comparison -isBetterMultiversionCandidate(const OverloadCandidate &Cand1, - const OverloadCandidate &Cand2) { +static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1, + const OverloadCandidate &Cand2) { if (!Cand1.Function || !Cand1.Function->isMultiVersion() || !Cand2.Function || !Cand2.Function->isMultiVersion()) - return Comparison::Equal; + return false; - // If both are invalid, they are equal. If one of them is invalid, the other - // is better. - if (Cand1.Function->isInvalidDecl()) { - if (Cand2.Function->isInvalidDecl()) - return Comparison::Equal; - return Comparison::Worse; - } - if (Cand2.Function->isInvalidDecl()) - return Comparison::Better; + // If Cand1 is invalid, it cannot be a better match, if Cand2 is invalid, this + // is obviously better. + if (Cand1.Function->isInvalidDecl()) return false; + if (Cand2.Function->isInvalidDecl()) return true; // If this is a cpu_dispatch/cpu_specific multiversion situation, prefer // cpu_dispatch, else arbitrarily based on the identifiers. @@ -9399,18 +9393,16 @@ isBetterMultiversionCandidate(const OverloadCandidate &Cand1, const auto *Cand2CPUSpec = Cand2.Function->getAttr<CPUSpecificAttr>(); if (!Cand1CPUDisp && !Cand2CPUDisp && !Cand1CPUSpec && !Cand2CPUSpec) - return Comparison::Equal; + return false; if (Cand1CPUDisp && !Cand2CPUDisp) - return Comparison::Better; + return true; if (Cand2CPUDisp && !Cand1CPUDisp) - return Comparison::Worse; + return false; if (Cand1CPUSpec && Cand2CPUSpec) { if (Cand1CPUSpec->cpus_size() != Cand2CPUSpec->cpus_size()) - return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size() - ? Comparison::Better - : Comparison::Worse; + return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size(); std::pair<CPUSpecificAttr::cpus_iterator, CPUSpecificAttr::cpus_iterator> FirstDiff = std::mismatch( @@ -9423,9 +9415,7 @@ isBetterMultiversionCandidate(const OverloadCandidate &Cand1, assert(FirstDiff.first != Cand1CPUSpec->cpus_end() && "Two diff erent cpu-specific versions should not have the same " "identifier list, otherwise they'd be the same decl!"); - return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName() - ? Comparison::Better - : Comparison::Worse; + return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName(); } llvm_unreachable("No way to get here unless both had cpu_dispatch"); } @@ -9485,50 +9475,6 @@ bool clang::isBetterOverloadCandidate( else if (!Cand1.Viable) return false; - // [CUDA] A function with 'never' preference is marked not viable, therefore - // is never shown up here. The worst preference shown up here is 'wrong side', - // e.g. a host function called by a device host function in device - // compilation. This is valid AST as long as the host device function is not - // emitted, e.g. it is an inline function which is called only by a host - // function. A deferred diagnostic will be triggered if it is emitted. - // However a wrong-sided function is still a viable candidate here. - // - // If Cand1 can be emitted and Cand2 cannot be emitted in the current - // context, Cand1 is better than Cand2. If Cand1 can not be emitted and Cand2 - // can be emitted, Cand1 is not better than Cand2. This rule should have - // precedence over other rules. - // - // If both Cand1 and Cand2 can be emitted, or neither can be emitted, then - // other rules should be used to determine which is better. This is because - // host/device based overloading resolution is mostly for determining - // viability of a function. If two functions are both viable, other factors - // should take precedence in preference, e.g. the standard-defined preferences - // like argument conversion ranks or enable_if partial-ordering. The - // preference for pass-object-size parameters is probably most similar to a - // type-based-overloading decision and so should take priority. - // - // If other rules cannot determine which is better, CUDA preference will be - // used again to determine which is better. - // - // TODO: Currently IdentifyCUDAPreference does not return correct values - // for functions called in global variable initializers due to missing - // correct context about device/host. Therefore we can only enforce this - // rule when there is a caller. We should enforce this rule for functions - // in global variable initializers once proper context is added. - if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { - if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) { - auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function); - auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function); - assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never); - auto Cand1Emittable = P1 > Sema::CFP_WrongSide; - auto Cand2Emittable = P2 > Sema::CFP_WrongSide; - if (Cand1Emittable && !Cand2Emittable) - return true; - if (!Cand1Emittable && Cand2Emittable) - return false; - } - } - // C++ [over.match.best]p1: // // -- if F is a static member function, ICS1(F) is defined such @@ -9763,6 +9709,12 @@ bool clang::isBetterOverloadCandidate( return Cmp == Comparison::Better; } + if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { + FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext); + return S.IdentifyCUDAPreference(Caller, Cand1.Function) > + S.IdentifyCUDAPreference(Caller, Cand2.Function); + } + bool HasPS1 = Cand1.Function != nullptr && functionHasPassObjectSizeParams(Cand1.Function); bool HasPS2 = Cand2.Function != nullptr && @@ -9770,21 +9722,7 @@ bool clang::isBetterOverloadCandidate( if (HasPS1 != HasPS2 && HasPS1) return true; - auto MV = isBetterMultiversionCandidate(Cand1, Cand2); - if (MV == Comparison::Better) - return true; - if (MV == Comparison::Worse) - return false; - - // If other rules cannot determine which is better, CUDA preference is used - // to determine which is better. - if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { - FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext); - return S.IdentifyCUDAPreference(Caller, Cand1.Function) > - S.IdentifyCUDAPreference(Caller, Cand2.Function); - } - - return false; + return isBetterMultiversionCandidate(Cand1, Cand2); } /// Determine whether two declarations are "equivalent" for the purposes of @@ -9870,6 +9808,33 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc, std::transform(begin(), end(), std::back_inserter(Candidates), [](OverloadCandidate &Cand) { return &Cand; }); + // [CUDA] HD->H or HD->D calls are technically not allowed by CUDA but + // are accepted by both clang and NVCC. However, during a particular + // compilation mode only one call variant is viable. We need to + // exclude non-viable overload candidates from consideration based + // only on their host/device attributes. Specifically, if one + // candidate call is WrongSide and the other is SameSide, we ignore + // the WrongSide candidate. + if (S.getLangOpts().CUDA) { + const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext); + bool ContainsSameSideCandidate = + llvm::any_of(Candidates, [&](OverloadCandidate *Cand) { + // Check viable function only. + return Cand->Viable && Cand->Function && + S.IdentifyCUDAPreference(Caller, Cand->Function) == + Sema::CFP_SameSide; + }); + if (ContainsSameSideCandidate) { + auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) { + // Check viable function only to avoid unnecessary data copying/moving. + return Cand->Viable && Cand->Function && + S.IdentifyCUDAPreference(Caller, Cand->Function) == + Sema::CFP_WrongSide; + }; + llvm::erase_if(Candidates, IsWrongSideCandidate); + } + } + // Find the best viable function. Best = end(); for (auto *Cand : Candidates) { diff --git a/clang/test/SemaCUDA/function-overload.cu b/clang/test/SemaCUDA/function-overload.cu index 612d954b79af..b9efd1c09e69 100644 --- a/clang/test/SemaCUDA/function-overload.cu +++ b/clang/test/SemaCUDA/function-overload.cu @@ -1,8 +1,8 @@ // REQUIRES: x86-registered-target // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s -// RUN: %clang_cc1 -std=c++11 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s #include "Inputs/cuda.h" @@ -331,6 +331,9 @@ __device__ void test_device_calls_template_fn() { // If we have a mix of HD and H-only or D-only candidates in the overload set, // normal C++ overload resolution rules apply first. template <typename T> TemplateReturnTy template_vs_hd_function(T arg) +#ifdef __CUDA_ARCH__ +//expected-note@-2 {{declared here}} +#endif { return TemplateReturnTy(); } @@ -339,13 +342,11 @@ __host__ __device__ HostDeviceReturnTy template_vs_hd_function(float arg) { } __host__ __device__ void test_host_device_calls_hd_template() { + HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f); + TemplateReturnTy ret2 = template_vs_hd_function(1); #ifdef __CUDA_ARCH__ - typedef HostDeviceReturnTy ExpectedReturnTy; -#else - typedef TemplateReturnTy ExpectedReturnTy; + // expected-error@-2 {{reference to __host__ function 'template_vs_hd_function<int>' in __host__ __device__ function}} #endif - HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f); - ExpectedReturnTy ret2 = template_vs_hd_function(1); } __host__ void test_host_calls_hd_template() { @@ -366,14 +367,14 @@ __device__ void test_device_calls_hd_template() { __device__ DeviceReturnTy device_only_function(int arg) { return DeviceReturnTy(); } __device__ DeviceReturnTy2 device_only_function(float arg) { return DeviceReturnTy2(); } #ifndef __CUDA_ARCH__ - // expected-note@-3 2{{'device_only_function' declared here}} - // expected-note@-3 2{{'device_only_function' declared here}} + // expected-note@-3 {{'device_only_function' declared here}} + // expected-note@-3 {{'device_only_function' declared here}} #endif __host__ HostReturnTy host_only_function(int arg) { return HostReturnTy(); } __host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); } #ifdef __CUDA_ARCH__ - // expected-note@-3 2{{'host_only_function' declared here}} - // expected-note@-3 2{{'host_only_function' declared here}} + // expected-note@-3 {{'host_only_function' declared here}} + // expected-note@-3 {{'host_only_function' declared here}} #endif __host__ __device__ void test_host_device_single_side_overloading() { @@ -391,37 +392,6 @@ __host__ __device__ void test_host_device_single_side_overloading() { #endif } -// wrong-sided overloading should not cause diagnostic unless it is emitted. -// This inline function is not emitted. -inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_no_diag() { - DeviceReturnTy ret1 = device_only_function(1); - DeviceReturnTy2 ret2 = device_only_function(1.0f); - HostReturnTy ret3 = host_only_function(1); - HostReturnTy2 ret4 = host_only_function(1.0f); -} - -// wrong-sided overloading should cause diagnostic if it is emitted. -// This inline function is emitted since it is called by an emitted function. -inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_diag() { - DeviceReturnTy ret1 = device_only_function(1); - DeviceReturnTy2 ret2 = device_only_function(1.0f); -#ifndef __CUDA_ARCH__ - // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}} - // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}} -#endif - HostReturnTy ret3 = host_only_function(1); - HostReturnTy2 ret4 = host_only_function(1.0f); -#ifdef __CUDA_ARCH__ - // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}} - // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}} -#endif -} - -__host__ __device__ void test_host_device_wrong_side_overloading_inline_diag_caller() { - test_host_device_wrong_side_overloading_inline_diag(); - // expected-note@-1 {{called by 'test_host_device_wrong_side_overloading_inline_diag_caller'}} -} - // Verify that we allow overloading function templates. template <typename T> __host__ T template_overload(const T &a) { return a; }; template <typename T> __device__ T template_overload(const T &a) { return a; }; @@ -449,17 +419,3 @@ __host__ __device__ int constexpr_overload(const T &x, const T &y) { int test_constexpr_overload(C2 &x, C2 &y) { return constexpr_overload(x, y); } - -// Verify no ambiguity for new operator. -void *a = new int; -__device__ void *b = new int; -// expected-error@-1{{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} - -// Verify no ambiguity for new operator. -template<typename _Tp> _Tp&& f(); -template<typename _Tp, typename = decltype(new _Tp(f<_Tp>()))> -void __test(); - -void foo() { - __test<int>(); -} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits