yaxunl created this revision.
yaxunl added reviewers: tra, rsmith.
Herald added subscribers: mattd, carlosgalvezp.
Herald added a project: All.
yaxunl requested review of this revision.

nvcc allows using std::malloc and std::free in device code.
When std::malloc or std::free is passed as a template
function argument with template argument deduction,
there is no diagnostics. e.g.

  #include <memory>
  
  __global__ void kern() {
      void *p = std::malloc(1);
      std::free(p);
  }
  int main()
  {
  
      std::shared_ptr<float> a;
      a = std::shared_ptr<float>(
        (float*)std::malloc(sizeof(float) * 100),
        std::free
      );
      return 0;
  }

However, the same code fails to compile with clang
(https://godbolt.org/z/1roGvo6YY). The reason is
that clang does not have logic to choose a function
argument from an overloaded set of candidates
based on host/device attributes for template argument
deduction.

Currently, clang does have a logic to choose a candidate
based on the constraints of the candidates. This patch
extends that logic to account for the CUDA host/device-based
preference.


https://reviews.llvm.org/D154300

Files:
  clang/lib/Sema/SemaOverload.cpp
  clang/test/SemaCUDA/template-arg-deduction.cu

Index: clang/test/SemaCUDA/template-arg-deduction.cu
===================================================================
--- /dev/null
+++ clang/test/SemaCUDA/template-arg-deduction.cu
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
+
+// expected-no-diagnostics
+
+#include "Inputs/cuda.h"
+
+void foo();
+__device__ void foo();
+
+template<class F>
+void host_temp(F f);
+
+template<class F>
+__device__ void device_temp(F f);
+
+void host_caller() {
+  host_temp(foo);
+}
+
+__global__ void kernel_caller() {
+  device_temp(foo);
+}
+
+__device__ void device_caller() {
+  device_temp(foo);
+}
Index: clang/lib/Sema/SemaOverload.cpp
===================================================================
--- clang/lib/Sema/SemaOverload.cpp
+++ clang/lib/Sema/SemaOverload.cpp
@@ -12697,6 +12697,20 @@
   DeclAccessPair DAP;
   SmallVector<FunctionDecl *, 2> AmbiguousDecls;
 
+  auto CheckCUDAPreference = [&](FunctionDecl *FD1,
+                                 FunctionDecl *FD2) -> std::optional<bool> {
+    FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true);
+    int Preference1 = IdentifyCUDAPreference(Caller, FD1);
+    int Preference2 = IdentifyCUDAPreference(Caller, FD2);
+    if (Preference1 > Preference2) {
+      return true;
+    } else if (Preference1 < Preference2) {
+      return false;
+    } else {
+      return std::nullopt;
+    }
+  };
+
   auto CheckMoreConstrained = [&](FunctionDecl *FD1,
                                   FunctionDecl *FD2) -> std::optional<bool> {
     if (FunctionDecl *MF = FD1->getInstantiatedFromMemberFunction())
@@ -12727,9 +12741,33 @@
     if (!checkAddressOfFunctionIsAvailable(FD))
       continue;
 
+    // If we found a better result, update Result.
+    auto FoundBetter = [&]() {
+      IsResultAmbiguous = false;
+      DAP = I.getPair();
+      Result = FD;
+    };
+
     // We have more than one result - see if it is more constrained than the
     // previous one.
     if (Result) {
+      // Check CUDA preference first. If the candidates have differennt CUDA
+      // preference, choose the one with higher CUDA preference. Otherwise,
+      // choose the one with more constraints.
+      if (getLangOpts().CUDA) {
+        std::optional<bool> MorePreferableByCUDA =
+            CheckCUDAPreference(FD, Result);
+        // If FD has different CUDA preference than Result.
+        if (MorePreferableByCUDA) {
+          // FD is less preferable than Result.
+          if (!*MorePreferableByCUDA)
+            continue;
+          // FD is more preferable than Result.
+          FoundBetter();
+        }
+      }
+      // FD has the same CUDA prefernece than Result. Continue check
+      // constraints.
       std::optional<bool> MoreConstrainedThanPrevious =
           CheckMoreConstrained(FD, Result);
       if (!MoreConstrainedThanPrevious) {
@@ -12741,9 +12779,7 @@
         continue;
       // FD is more constrained - replace Result with it.
     }
-    IsResultAmbiguous = false;
-    DAP = I.getPair();
-    Result = FD;
+    FoundBetter();
   }
 
   if (IsResultAmbiguous)
@@ -12753,9 +12789,15 @@
     SmallVector<const Expr *, 1> ResultAC;
     // We skipped over some ambiguous declarations which might be ambiguous with
     // the selected result.
-    for (FunctionDecl *Skipped : AmbiguousDecls)
+    for (FunctionDecl *Skipped : AmbiguousDecls) {
+      // If skipped candidate has different CUDA preference than the result,
+      // there is no ambiguity. Otherwise check whether they have different
+      // constraints.
+      if (getLangOpts().CUDA && CheckCUDAPreference(Skipped, Result))
+        continue;
       if (!CheckMoreConstrained(Skipped, Result))
         return nullptr;
+    }
     Pair = DAP;
   }
   return Result;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D154300: [CUDA][HIP... Yaxun Liu via Phabricator via cfe-commits

Reply via email to