[clang] bf6a26b - Revert D77954 -- it breaks Eigen & Tensorflow.

Artem Belevich via cfe-commits Tue, 05 May 2020 14:08:43 -0700

Author: Artem Belevich
Date: 2020-05-05T14:07:31-07:00
New Revision: bf6a26b066382e0f41bf023c781d84061c542307


URL: 
https://github.com/llvm/llvm-project/commit/bf6a26b066382e0f41bf023c781d84061c542307
DIFF: 
https://github.com/llvm/llvm-project/commit/bf6a26b066382e0f41bf023c781d84061c542307.diff

LOG: Revert D77954 -- it breaks Eigen & Tensorflow.

This reverts commit 55bcb96f3154808bcb5afc3fb46d8e00bf1db847.

Added: 
    

Modified: 
    clang/lib/Sema/SemaOverload.cpp
    clang/test/SemaCUDA/function-overload.cu

Removed: 
    


################################################################################
diff  --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 57b650de3fee..c400d47dd2bd 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -9374,22 +9374,16 @@ static Comparison compareEnableIfAttrs(const Sema &S, 
const FunctionDecl *Cand1,
   return Comparison::Equal;
 }
 
-static Comparison
-isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
-                              const OverloadCandidate &Cand2) {
+static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
+                                          const OverloadCandidate &Cand2) {
   if (!Cand1.Function || !Cand1.Function->isMultiVersion() || !Cand2.Function 
||
       !Cand2.Function->isMultiVersion())
-    return Comparison::Equal;
+    return false;
 
-  // If both are invalid, they are equal. If one of them is invalid, the other
-  // is better.
-  if (Cand1.Function->isInvalidDecl()) {
-    if (Cand2.Function->isInvalidDecl())
-      return Comparison::Equal;
-    return Comparison::Worse;
-  }
-  if (Cand2.Function->isInvalidDecl())
-    return Comparison::Better;
+  // If Cand1 is invalid, it cannot be a better match, if Cand2 is invalid, 
this
+  // is obviously better.
+  if (Cand1.Function->isInvalidDecl()) return false;
+  if (Cand2.Function->isInvalidDecl()) return true;
 
   // If this is a cpu_dispatch/cpu_specific multiversion situation, prefer
   // cpu_dispatch, else arbitrarily based on the identifiers.
@@ -9399,18 +9393,16 @@ isBetterMultiversionCandidate(const OverloadCandidate 
&Cand1,
   const auto *Cand2CPUSpec = Cand2.Function->getAttr<CPUSpecificAttr>();
 
   if (!Cand1CPUDisp && !Cand2CPUDisp && !Cand1CPUSpec && !Cand2CPUSpec)
-    return Comparison::Equal;
+    return false;
 
   if (Cand1CPUDisp && !Cand2CPUDisp)
-    return Comparison::Better;
+    return true;
   if (Cand2CPUDisp && !Cand1CPUDisp)
-    return Comparison::Worse;
+    return false;
 
   if (Cand1CPUSpec && Cand2CPUSpec) {
     if (Cand1CPUSpec->cpus_size() != Cand2CPUSpec->cpus_size())
-      return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size()
-                 ? Comparison::Better
-                 : Comparison::Worse;
+      return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size();
 
     std::pair<CPUSpecificAttr::cpus_iterator, CPUSpecificAttr::cpus_iterator>
         FirstDiff = std::mismatch(
@@ -9423,9 +9415,7 @@ isBetterMultiversionCandidate(const OverloadCandidate 
&Cand1,
     assert(FirstDiff.first != Cand1CPUSpec->cpus_end() &&
            "Two 
diff erent cpu-specific versions should not have the same "
            "identifier list, otherwise they'd be the same decl!");
-    return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName()
-               ? Comparison::Better
-               : Comparison::Worse;
+    return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName();
   }
   llvm_unreachable("No way to get here unless both had cpu_dispatch");
 }
@@ -9485,50 +9475,6 @@ bool clang::isBetterOverloadCandidate(
   else if (!Cand1.Viable)
     return false;
 
-  // [CUDA] A function with 'never' preference is marked not viable, therefore
-  // is never shown up here. The worst preference shown up here is 'wrong 
side',
-  // e.g. a host function called by a device host function in device
-  // compilation. This is valid AST as long as the host device function is not
-  // emitted, e.g. it is an inline function which is called only by a host
-  // function. A deferred diagnostic will be triggered if it is emitted.
-  // However a wrong-sided function is still a viable candidate here.
-  //
-  // If Cand1 can be emitted and Cand2 cannot be emitted in the current
-  // context, Cand1 is better than Cand2. If Cand1 can not be emitted and Cand2
-  // can be emitted, Cand1 is not better than Cand2. This rule should have
-  // precedence over other rules.
-  //
-  // If both Cand1 and Cand2 can be emitted, or neither can be emitted, then
-  // other rules should be used to determine which is better. This is because
-  // host/device based overloading resolution is mostly for determining
-  // viability of a function. If two functions are both viable, other factors
-  // should take precedence in preference, e.g. the standard-defined 
preferences
-  // like argument conversion ranks or enable_if partial-ordering. The
-  // preference for pass-object-size parameters is probably most similar to a
-  // type-based-overloading decision and so should take priority.
-  //
-  // If other rules cannot determine which is better, CUDA preference will be
-  // used again to determine which is better.
-  //
-  // TODO: Currently IdentifyCUDAPreference does not return correct values
-  // for functions called in global variable initializers due to missing
-  // correct context about device/host. Therefore we can only enforce this
-  // rule when there is a caller. We should enforce this rule for functions
-  // in global variable initializers once proper context is added.
-  if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
-    if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) {
-      auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function);
-      auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function);
-      assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never);
-      auto Cand1Emittable = P1 > Sema::CFP_WrongSide;
-      auto Cand2Emittable = P2 > Sema::CFP_WrongSide;
-      if (Cand1Emittable && !Cand2Emittable)
-        return true;
-      if (!Cand1Emittable && Cand2Emittable)
-        return false;
-    }
-  }
-
   // C++ [over.match.best]p1:
   //
   //   -- if F is a static member function, ICS1(F) is defined such
@@ -9763,6 +9709,12 @@ bool clang::isBetterOverloadCandidate(
       return Cmp == Comparison::Better;
   }
 
+  if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
+    FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
+    return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
+           S.IdentifyCUDAPreference(Caller, Cand2.Function);
+  }
+
   bool HasPS1 = Cand1.Function != nullptr &&
                 functionHasPassObjectSizeParams(Cand1.Function);
   bool HasPS2 = Cand2.Function != nullptr &&
@@ -9770,21 +9722,7 @@ bool clang::isBetterOverloadCandidate(
   if (HasPS1 != HasPS2 && HasPS1)
     return true;
 
-  auto MV = isBetterMultiversionCandidate(Cand1, Cand2);
-  if (MV == Comparison::Better)
-    return true;
-  if (MV == Comparison::Worse)
-    return false;
-
-  // If other rules cannot determine which is better, CUDA preference is used
-  // to determine which is better.
-  if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
-    FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
-    return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
-           S.IdentifyCUDAPreference(Caller, Cand2.Function);
-  }
-
-  return false;
+  return isBetterMultiversionCandidate(Cand1, Cand2);
 }
 
 /// Determine whether two declarations are "equivalent" for the purposes of
@@ -9870,6 +9808,33 @@ OverloadCandidateSet::BestViableFunction(Sema &S, 
SourceLocation Loc,
   std::transform(begin(), end(), std::back_inserter(Candidates),
                  [](OverloadCandidate &Cand) { return &Cand; });
 
+  // [CUDA] HD->H or HD->D calls are technically not allowed by CUDA but
+  // are accepted by both clang and NVCC. However, during a particular
+  // compilation mode only one call variant is viable. We need to
+  // exclude non-viable overload candidates from consideration based
+  // only on their host/device attributes. Specifically, if one
+  // candidate call is WrongSide and the other is SameSide, we ignore
+  // the WrongSide candidate.
+  if (S.getLangOpts().CUDA) {
+    const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
+    bool ContainsSameSideCandidate =
+        llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
+          // Check viable function only.
+          return Cand->Viable && Cand->Function &&
+                 S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+                     Sema::CFP_SameSide;
+        });
+    if (ContainsSameSideCandidate) {
+      auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
+        // Check viable function only to avoid unnecessary data copying/moving.
+        return Cand->Viable && Cand->Function &&
+               S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+                   Sema::CFP_WrongSide;
+      };
+      llvm::erase_if(Candidates, IsWrongSideCandidate);
+    }
+  }
+
   // Find the best viable function.
   Best = end();
   for (auto *Cand : Candidates) {

diff  --git a/clang/test/SemaCUDA/function-overload.cu 
b/clang/test/SemaCUDA/function-overload.cu
index 612d954b79af..b9efd1c09e69 100644
--- a/clang/test/SemaCUDA/function-overload.cu
+++ b/clang/test/SemaCUDA/function-overload.cu
@@ -1,8 +1,8 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: nvptx-registered-target
 
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fsyntax-only 
-verify %s
-// RUN: %clang_cc1 -std=c++11 -triple nvptx64-nvidia-cuda -fsyntax-only 
-fcuda-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device 
-verify %s
 
 #include "Inputs/cuda.h"
 
@@ -331,6 +331,9 @@ __device__ void test_device_calls_template_fn() {
 // If we have a mix of HD and H-only or D-only candidates in the overload set,
 // normal C++ overload resolution rules apply first.
 template <typename T> TemplateReturnTy template_vs_hd_function(T arg)
+#ifdef __CUDA_ARCH__
+//expected-note@-2 {{declared here}}
+#endif
 {
   return TemplateReturnTy();
 }
@@ -339,13 +342,11 @@ __host__ __device__ HostDeviceReturnTy 
template_vs_hd_function(float arg) {
 }
 
 __host__ __device__ void test_host_device_calls_hd_template() {
+  HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
+  TemplateReturnTy ret2 = template_vs_hd_function(1);
 #ifdef __CUDA_ARCH__
-  typedef HostDeviceReturnTy ExpectedReturnTy;
-#else
-  typedef TemplateReturnTy ExpectedReturnTy;
+  // expected-error@-2 {{reference to __host__ function 
'template_vs_hd_function<int>' in __host__ __device__ function}}
 #endif
-  HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
-  ExpectedReturnTy ret2 = template_vs_hd_function(1);
 }
 
 __host__ void test_host_calls_hd_template() {
@@ -366,14 +367,14 @@ __device__ void test_device_calls_hd_template() {
 __device__ DeviceReturnTy device_only_function(int arg) { return 
DeviceReturnTy(); }
 __device__ DeviceReturnTy2 device_only_function(float arg) { return 
DeviceReturnTy2(); }
 #ifndef __CUDA_ARCH__
-  // expected-note@-3 2{{'device_only_function' declared here}}
-  // expected-note@-3 2{{'device_only_function' declared here}}
+  // expected-note@-3 {{'device_only_function' declared here}}
+  // expected-note@-3 {{'device_only_function' declared here}}
 #endif
 __host__ HostReturnTy host_only_function(int arg) { return HostReturnTy(); }
 __host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); 
}
 #ifdef __CUDA_ARCH__
-  // expected-note@-3 2{{'host_only_function' declared here}}
-  // expected-note@-3 2{{'host_only_function' declared here}}
+  // expected-note@-3 {{'host_only_function' declared here}}
+  // expected-note@-3 {{'host_only_function' declared here}}
 #endif
 
 __host__ __device__ void test_host_device_single_side_overloading() {
@@ -391,37 +392,6 @@ __host__ __device__ void 
test_host_device_single_side_overloading() {
 #endif
 }
 
-// wrong-sided overloading should not cause diagnostic unless it is emitted.
-// This inline function is not emitted.
-inline __host__ __device__ void 
test_host_device_wrong_side_overloading_inline_no_diag() {
-  DeviceReturnTy ret1 = device_only_function(1);
-  DeviceReturnTy2 ret2 = device_only_function(1.0f);
-  HostReturnTy ret3 = host_only_function(1);
-  HostReturnTy2 ret4 = host_only_function(1.0f);
-}
-
-// wrong-sided overloading should cause diagnostic if it is emitted.
-// This inline function is emitted since it is called by an emitted function.
-inline __host__ __device__ void 
test_host_device_wrong_side_overloading_inline_diag() {
-  DeviceReturnTy ret1 = device_only_function(1);
-  DeviceReturnTy2 ret2 = device_only_function(1.0f);
-#ifndef __CUDA_ARCH__
-  // expected-error@-3 {{reference to __device__ function 
'device_only_function' in __host__ __device__ function}}
-  // expected-error@-3 {{reference to __device__ function 
'device_only_function' in __host__ __device__ function}}
-#endif
-  HostReturnTy ret3 = host_only_function(1);
-  HostReturnTy2 ret4 = host_only_function(1.0f);
-#ifdef __CUDA_ARCH__
-  // expected-error@-3 {{reference to __host__ function 'host_only_function' 
in __host__ __device__ function}}
-  // expected-error@-3 {{reference to __host__ function 'host_only_function' 
in __host__ __device__ function}}
-#endif
-}
-
-__host__ __device__ void 
test_host_device_wrong_side_overloading_inline_diag_caller() {
-  test_host_device_wrong_side_overloading_inline_diag();
-  // expected-note@-1 {{called by 
'test_host_device_wrong_side_overloading_inline_diag_caller'}}
-}
-
 // Verify that we allow overloading function templates.
 template <typename T> __host__ T template_overload(const T &a) { return a; };
 template <typename T> __device__ T template_overload(const T &a) { return a; };
@@ -449,17 +419,3 @@ __host__ __device__ int constexpr_overload(const T &x, 
const T &y) {
 int test_constexpr_overload(C2 &x, C2 &y) {
   return constexpr_overload(x, y);
 }
-
-// Verify no ambiguity for new operator.
-void *a = new int;
-__device__ void *b = new int;
-// expected-error@-1{{dynamic initialization is not supported for __device__, 
__constant__, and __shared__ variables.}}
-
-// Verify no ambiguity for new operator.
-template<typename _Tp> _Tp&& f();
-template<typename _Tp, typename = decltype(new _Tp(f<_Tp>()))>
-void __test();
-
-void foo() {
-  __test<int>();
-}


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] bf6a26b - Revert D77954 -- it breaks Eigen & Tensorflow.

Reply via email to