[PATCH] D71227: [cuda][hip] Fix function overload resolution in the global initiailizer.

Michael Liao via Phabricator via cfe-commits Mon, 09 Dec 2019 13:42:00 -0800

hliao created this revision.
hliao added reviewers: tra, jlebar, yaxunl.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
hliao updated this revision to Diff 232933.
hliao added a comment.
hliao edited the summary of this revision.


refine commit message


- As global initializers are not under any function body, they needs to look 
into the current variable being initialized. That is not addressed in the 
current CUDA/HIP overloadable function resolution and ignore target checking. 
That may result in wrong candidate to be considered as illustrated in the newly 
added test case.
- In this patch, a non-local varialble stack is introduced to keep trace the 
current non-local variable being initialized so that initialization function 
could be inspected for the target preference.
- Besides newly added tests, existing tests are refined as the current 
implementation adds extra check on global initializers to ensure no device 
functions are used.  As the target match checking is enabled in this patch, 
such check is only necessary for CUDA device global variables. They are not 
allowed to be non-trivially inialized. As HIP starts to support non-trivial 
initialization of device initialization, such target matching check is 
mandatory to be enforced.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D71227

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/Parse/ParseDecl.cpp
  clang/lib/Sema/SemaCUDA.cpp
  clang/lib/Sema/SemaDeclCXX.cpp
  clang/lib/Sema/SemaExprCXX.cpp
  clang/lib/Sema/SemaOverload.cpp
  clang/test/SemaCUDA/function-overload.cu
  clang/test/SemaCUDA/global-initializers-host.cu
  clang/test/SemaCUDA/hip-pinned-shadow.cu

Index: clang/test/SemaCUDA/hip-pinned-shadow.cu
===================================================================
--- clang/test/SemaCUDA/hip-pinned-shadow.cu
+++ clang/test/SemaCUDA/hip-pinned-shadow.cu
@@ -13,13 +13,19 @@
 
 template <class T, int texType, int hipTextureReadMode>
 struct texture : public textureReference {
+// expected-note@-1{{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 0 were provided}}
+// expected-note@-2{{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 0 were provided}}
+// expected-note@-3{{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 0 were provided}}
+// expected-note@-4{{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 0 were provided}}
 texture() { a = 1; }
+// expected-note@-1{{candidate constructor not viable: call to __host__ function from __device__ function}}
+// expected-note@-2{{candidate constructor not viable: call to __host__ function from __device__ function}}
 };
 
 __hip_pinned_shadow__ texture<float, 2, 1> tex;
 __device__ __hip_pinned_shadow__ texture<float, 2, 1> tex2; // expected-error{{'hip_pinned_shadow' and 'device' attributes are not compatible}}
-                                                            // expected-error@-1{{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables}}
-                                                            // expected-note@-2{{conflicting attribute is here}}
+                                                            // expected-note@-1{{conflicting attribute is here}}
+                                                            // expected-error@-2{{no matching constructor for initialization of 'texture<float, 2, 1>'}}
 __constant__ __hip_pinned_shadow__ texture<float, 2, 1> tex3; // expected-error{{'hip_pinned_shadow' and 'constant' attributes are not compatible}}
-                                                              // expected-error@-1{{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables}}
-                                                              // expected-note@-2{{conflicting attribute is here}}
+                                                              // expected-note@-1{{conflicting attribute is here}}
+                                                              // expected-error@-2{{no matching constructor for initialization of 'texture<float, 2, 1>'}}
Index: clang/test/SemaCUDA/global-initializers-host.cu
===================================================================
--- clang/test/SemaCUDA/global-initializers-host.cu
+++ clang/test/SemaCUDA/global-initializers-host.cu
@@ -6,12 +6,14 @@
 // module initializer.
 
 struct S {
+  // expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 0 were provided}}
+  // expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 0 were provided}}
   __device__ S() {}
-  // expected-note@-1 {{'S' declared here}}
+  // expected-note@-1 {{candidate constructor not viable: call to __device__ function from __host__ function}}
 };
 
 S s;
-// expected-error@-1 {{reference to __device__ function 'S' in global initializer}}
+// expected-error@-1 {{no matching constructor for initialization of 'S'}}
 
 struct T {
   __host__ __device__ T() {}
@@ -19,14 +21,17 @@
 T t;  // No error, this is OK.
 
 struct U {
+  // expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int' to 'const U' for 1st argument}}
+  // expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int' to 'U' for 1st argument}}
   __host__ U() {}
+  // expected-note@-1 {{candidate constructor not viable: requires 0 arguments, but 1 was provided}}
   __device__ U(int) {}
-  // expected-note@-1 {{'U' declared here}}
+  // expected-note@-1 {{candidate constructor not viable: call to __device__ function from __host__ function}}
 };
 U u(42);
-// expected-error@-1 {{reference to __device__ function 'U' in global initializer}}
+// expected-error@-1 {{no matching constructor for initialization of 'U'}}
 
 __device__ int device_fn() { return 42; }
-// expected-note@-1 {{'device_fn' declared here}}
+// expected-note@-1 {{candidate function not viable: call to __device__ function from __host__ function}}
 int n = device_fn();
-// expected-error@-1 {{reference to __device__ function 'device_fn' in global initializer}}
+// expected-error@-1 {{no matching function for call to 'device_fn'}}
Index: clang/test/SemaCUDA/function-overload.cu
===================================================================
--- clang/test/SemaCUDA/function-overload.cu
+++ clang/test/SemaCUDA/function-overload.cu
@@ -214,8 +214,10 @@
 // Test for address of overloaded function resolution in the global context.
 HostFnPtr fp_h = h;
 HostFnPtr fp_ch = ch;
+#if !defined(__CUDA_ARCH__)
 CurrentFnPtr fp_dh = dh;
 CurrentFnPtr fp_cdh = cdh;
+#endif
 GlobalFnPtr fp_g = g;
 
 
@@ -419,3 +421,15 @@
 int test_constexpr_overload(C2 &x, C2 &y) {
   return constexpr_overload(x, y);
 }
+
+__device__ float fn(int);
+__host__ float fn(float);
+
+// Overload resolution should follow the same rule in the global
+// initialization.
+float gvar1 = fn(1);
+
+__device__ float dev_only_fn(int);
+// expected-note@-1 {{candidate function not viable: call to __device__ function from __host__ function}}
+
+float gvar2 = dev_only_fn(1); // expected-error {{no matching function for call to 'dev_only_fn'}}
Index: clang/lib/Sema/SemaOverload.cpp
===================================================================
--- clang/lib/Sema/SemaOverload.cpp
+++ clang/lib/Sema/SemaOverload.cpp
@@ -6262,17 +6262,12 @@
   }
 
   // (CUDA B.1): Check for invalid calls between targets.
-  if (getLangOpts().CUDA)
-    if (const FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext))
-      // Skip the check for callers that are implicit members, because in this
-      // case we may not yet know what the member's target is; the target is
-      // inferred for the member automatically, based on the bases and fields of
-      // the class.
-      if (!Caller->isImplicit() && !IsAllowedCUDACall(Caller, Function)) {
-        Candidate.Viable = false;
-        Candidate.FailureKind = ovl_fail_bad_target;
-        return;
-      }
+  if (getLangOpts().CUDA &&
+      !isCUDACallAllowed(Function, Sema::SkipImplicitCaller)) {
+    Candidate.Viable = false;
+    Candidate.FailureKind = ovl_fail_bad_target;
+    return;
+  }
 
   // Determine the implicit conversion sequences for each of the
   // arguments.
@@ -6782,13 +6777,11 @@
   }
 
   // (CUDA B.1): Check for invalid calls between targets.
-  if (getLangOpts().CUDA)
-    if (const FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext))
-      if (!IsAllowedCUDACall(Caller, Method)) {
-        Candidate.Viable = false;
-        Candidate.FailureKind = ovl_fail_bad_target;
-        return;
-      }
+  if (getLangOpts().CUDA && !isCUDACallAllowed(Method)) {
+    Candidate.Viable = false;
+    Candidate.FailureKind = ovl_fail_bad_target;
+    return;
+  }
 
   // Determine the implicit conversion sequences for each of the
   // arguments.
@@ -9531,9 +9524,9 @@
   }
 
   if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
-    FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
-    return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
-           S.IdentifyCUDAPreference(Caller, Cand2.Function);
+    const Decl *ContextDecl = S.getCUDAContextDecl();
+    return S.IdentifyCUDAPreference(ContextDecl, Cand1.Function) >
+           S.IdentifyCUDAPreference(ContextDecl, Cand2.Function);
   }
 
   bool HasPS1 = Cand1.Function != nullptr &&
@@ -9638,19 +9631,19 @@
   // candidate call is WrongSide and the other is SameSide, we ignore
   // the WrongSide candidate.
   if (S.getLangOpts().CUDA) {
-    const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
+    const Decl *ContextDecl = S.getCUDAContextDecl();
     bool ContainsSameSideCandidate =
         llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
           // Check viable function only.
           return Cand->Viable && Cand->Function &&
-                 S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+                 S.IdentifyCUDAPreference(ContextDecl, Cand->Function) ==
                      Sema::CFP_SameSide;
         });
     if (ContainsSameSideCandidate) {
       auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
         // Check viable function only to avoid unnecessary data copying/moving.
         return Cand->Viable && Cand->Function &&
-               S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+               S.IdentifyCUDAPreference(ContextDecl, Cand->Function) ==
                    Sema::CFP_WrongSide;
       };
       llvm::erase_if(Candidates, IsWrongSideCandidate);
@@ -10543,10 +10536,10 @@
 
 /// CUDA: diagnose an invalid call across targets.
 static void DiagnoseBadTarget(Sema &S, OverloadCandidate *Cand) {
-  FunctionDecl *Caller = cast<FunctionDecl>(S.CurContext);
+  const Decl *ContextDecl = S.getCUDAContextDecl();
   FunctionDecl *Callee = Cand->Function;
 
-  Sema::CUDAFunctionTarget CallerTarget = S.IdentifyCUDATarget(Caller),
+  Sema::CUDAFunctionTarget CallerTarget = S.IdentifyCUDATarget(ContextDecl),
                            CalleeTarget = S.IdentifyCUDATarget(Callee);
 
   std::string FnDesc;
@@ -11536,10 +11529,9 @@
       return false;
 
     if (FunctionDecl *FunDecl = dyn_cast<FunctionDecl>(Fn)) {
-      if (S.getLangOpts().CUDA)
-        if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext))
-          if (!Caller->isImplicit() && !S.IsAllowedCUDACall(Caller, FunDecl))
-            return false;
+      if (S.getLangOpts().CUDA &&
+          !S.isCUDACallAllowed(FunDecl, Sema::SkipImplicitCaller))
+        return false;
       if (FunDecl->isMultiVersion()) {
         const auto *TA = FunDecl->getAttr<TargetAttr>();
         if (TA && !TA->isDefaultVersion())
@@ -11654,7 +11646,7 @@
   }
 
   void EliminateSuboptimalCudaMatches() {
-    S.EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(S.CurContext), Matches);
+    S.EraseUnwantedCUDAMatches(S.getCUDAContextDecl(), Matches);
   }
 
 public:
Index: clang/lib/Sema/SemaExprCXX.cpp
===================================================================
--- clang/lib/Sema/SemaExprCXX.cpp
+++ clang/lib/Sema/SemaExprCXX.cpp
@@ -1433,9 +1433,9 @@
 
 bool Sema::isUsualDeallocationFunction(const CXXMethodDecl *Method) {
   // [CUDA] Ignore this function, if we can't call it.
-  const FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
+  const Decl *ContextDecl = getCUDAContextDecl();
   if (getLangOpts().CUDA &&
-      IdentifyCUDAPreference(Caller, Method) <= CFP_WrongSide)
+      IdentifyCUDAPreference(ContextDecl, Method) <= CFP_WrongSide)
     return false;
 
   SmallVector<const FunctionDecl*, 4> PreventedBy;
@@ -1449,7 +1449,7 @@
   return llvm::none_of(PreventedBy, [&](const FunctionDecl *FD) {
     assert(FD->getNumParams() == 1 &&
            "Only single-operand functions should be in PreventedBy");
-    return IdentifyCUDAPreference(Caller, FD) >= CFP_HostDevice;
+    return IdentifyCUDAPreference(ContextDecl, FD) >= CFP_HostDevice;
   });
 }
 
@@ -1512,8 +1512,7 @@
 
       // In CUDA, determine how much we'd like / dislike to call this.
       if (S.getLangOpts().CUDA)
-        if (auto *Caller = dyn_cast<FunctionDecl>(S.CurContext))
-          CUDAPref = S.IdentifyCUDAPreference(Caller, FD);
+        CUDAPref = S.IdentifyCUDAPreference(S.getCUDAContextDecl(), FD);
     }
 
     explicit operator bool() const { return FD; }
@@ -2554,7 +2553,7 @@
     }
 
     if (getLangOpts().CUDA)
-      EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(CurContext), Matches);
+      EraseUnwantedCUDAMatches(getCUDAContextDecl(), Matches);
   } else {
     // C++1y [expr.new]p22:
     //   For a non-placement allocation function, the normal deallocation
Index: clang/lib/Sema/SemaDeclCXX.cpp
===================================================================
--- clang/lib/Sema/SemaDeclCXX.cpp
+++ clang/lib/Sema/SemaDeclCXX.cpp
@@ -16213,6 +16213,20 @@
   return false;
 }
 
+void Sema::pushCUDANonLocalVariable(const Decl *D) {
+  if (!D || D->isInvalidDecl() || !isNonlocalVariable(D))
+    return;
+  CUDANonLocalVariableStack.push_back(D);
+}
+
+void Sema::popCUDANonLocalVariable(const Decl *D) {
+  if (!D || D->isInvalidDecl() || !isNonlocalVariable(D))
+    return;
+  assert(!CUDANonLocalVariableStack.empty() &&
+         CUDANonLocalVariableStack.back() == D);
+  CUDANonLocalVariableStack.pop_back();
+}
+
 /// Invoked when we are about to parse an initializer for the declaration
 /// 'Dcl'.
 ///
Index: clang/lib/Sema/SemaCUDA.cpp
===================================================================
--- clang/lib/Sema/SemaCUDA.cpp
+++ clang/lib/Sema/SemaCUDA.cpp
@@ -95,7 +95,7 @@
 }
 
 template <typename A>
-static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
+static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
   return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
            return isa<A>(Attribute) &&
                   !(IgnoreImplicitAttr && Attribute->isImplicit());
@@ -130,6 +130,41 @@
   return CFT_Host;
 }
 
+Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const VarDecl *D,
+                                                  bool IgnoreImplicitHDAttr) {
+  if (D == nullptr)
+    return CFT_Host;
+
+  assert(D->hasGlobalStorage() && "Only non-local variable needs identifying.");
+
+  if (D->hasAttr<CUDAInvalidTargetAttr>())
+    return CFT_InvalidTarget;
+
+  if (hasAttr<HIPPinnedShadowAttr>(D, IgnoreImplicitHDAttr))
+    return CFT_Host;
+
+  if (hasAttr<CUDAConstantAttr>(D, IgnoreImplicitHDAttr) ||
+      hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr) ||
+      hasAttr<CUDASharedAttr>(D, IgnoreImplicitHDAttr))
+    return CFT_Device;
+
+  return CFT_Host;
+}
+
+Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const Decl *D,
+                                                  bool IgnoreImplicitHDAttr) {
+  if (D == nullptr)
+    return CFT_Host;
+
+  if (auto FD = dyn_cast<FunctionDecl>(D))
+    return IdentifyCUDATarget(FD, IgnoreImplicitHDAttr);
+
+  if (auto VD = dyn_cast<VarDecl>(D))
+    return IdentifyCUDATarget(VD, IgnoreImplicitHDAttr);
+
+  llvm_unreachable("Unexpected decl for CUDA target identification.");
+}
+
 // * CUDA Call preference table
 //
 // F - from,
@@ -159,10 +194,10 @@
 // | hd | hd | HD  | HD  | (b) |
 
 Sema::CUDAFunctionPreference
-Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
+Sema::IdentifyCUDAPreference(const Decl *ContextDecl,
                              const FunctionDecl *Callee) {
   assert(Callee && "Callee must be valid.");
-  CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
+  CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(ContextDecl);
   CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
 
   // If one of the targets is invalid, the check always fails, no matter what
@@ -211,16 +246,16 @@
 }
 
 void Sema::EraseUnwantedCUDAMatches(
-    const FunctionDecl *Caller,
+    const Decl *ContextDecl,
     SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
   if (Matches.size() <= 1)
     return;
 
   using Pair = std::pair<DeclAccessPair, FunctionDecl*>;
 
-  // Gets the CUDA function preference for a call from Caller to Match.
+  // Gets the CUDA function preference for a call from call context to Match.
   auto GetCFP = [&](const Pair &Match) {
-    return IdentifyCUDAPreference(Caller, Match.second);
+    return IdentifyCUDAPreference(ContextDecl, Match.second);
   };
 
   // Find the best call preference among the functions in Matches.
Index: clang/lib/Parse/ParseDecl.cpp
===================================================================
--- clang/lib/Parse/ParseDecl.cpp
+++ clang/lib/Parse/ParseDecl.cpp
@@ -2333,6 +2333,8 @@
     }
   }
 
+  Actions.pushCUDANonLocalVariable(ThisDecl);
+
   // Parse declarator '=' initializer.
   // If a '==' or '+=' is found, suggest a fixit to '='.
   if (isTokenEqualOrEqualTypo()) {
@@ -2464,6 +2466,8 @@
     Actions.ActOnUninitializedDecl(ThisDecl);
   }
 
+  Actions.popCUDANonLocalVariable(ThisDecl);
+
   Actions.FinalizeDeclaration(ThisDecl);
 
   return ThisDecl;
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -11032,6 +11032,10 @@
   /// will get it wrong.  Returns CFT_Host if D is null.
   CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D,
                                         bool IgnoreImplicitHDAttr = false);
+  CUDAFunctionTarget IdentifyCUDATarget(const VarDecl *D,
+                                        bool IgnoreImplicitHDAttr = false);
+  CUDAFunctionTarget IdentifyCUDATarget(const Decl *D,
+                                        bool IgnoreImplicitHDAttr = false);
   CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs);
 
   /// Gets the CUDA target for the current context.
@@ -11059,17 +11063,40 @@
   /// \param Callee target function
   ///
   /// \returns preference value for particular Caller/Callee combination.
-  CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller,
+  CUDAFunctionPreference IdentifyCUDAPreference(const Decl *CallContextDecl,
                                                 const FunctionDecl *Callee);
 
+  SmallVector<const Decl *, 8> CUDANonLocalVariableStack;
+
+  void pushCUDANonLocalVariable(const Decl *D);
+  void popCUDANonLocalVariable(const Decl *D);
+  const Decl *getCUDACurrentNonLocalVariable() const {
+    return CUDANonLocalVariableStack.empty() ? nullptr
+                                             : CUDANonLocalVariableStack.back();
+  }
+
+  const Decl *getCUDAContextDecl() const {
+    const Decl *ContextDecl = dyn_cast<FunctionDecl>(CurContext);
+    if (!ContextDecl)
+      ContextDecl = getCUDACurrentNonLocalVariable();
+    return ContextDecl;
+  }
+
   /// Determines whether Caller may invoke Callee, based on their CUDA
   /// host/device attributes.  Returns false if the call is not allowed.
   ///
   /// Note: Will return true for CFP_WrongSide calls.  These may appear in
   /// semantically correct CUDA programs, but only if they're never codegen'ed.
-  bool IsAllowedCUDACall(const FunctionDecl *Caller,
-                         const FunctionDecl *Callee) {
-    return IdentifyCUDAPreference(Caller, Callee) != CFP_Never;
+  enum SkipCallerKind_t { SkipNoneCaller, SkipImplicitCaller };
+  bool isCUDACallAllowed(const FunctionDecl *Callee,
+                         SkipCallerKind_t Kind = SkipNoneCaller) {
+    // Skip contexts where no real call could be performed.
+    if (!CurContext->isFileContext() && !CurContext->isFunctionOrMethod())
+      return true;
+    const FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
+    if (Kind == SkipImplicitCaller && Caller && Caller->isImplicit())
+      return true;
+    return IdentifyCUDAPreference(getCUDAContextDecl(), Callee) != CFP_Never;
   }
 
   /// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD,
@@ -11106,7 +11133,7 @@
   /// from \p Caller context and erases all functions with lower
   /// calling priority.
   void EraseUnwantedCUDAMatches(
-      const FunctionDecl *Caller,
+      const Decl *ContextDecl,
       SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches);
 
   /// Given a implicit special member, infer its CUDA target from the

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D71227: [cuda][hip] Fix function overload resolution in the global initiailizer.

Reply via email to