yaxunl updated this revision to Diff 304494.
yaxunl edited the summary of this revision.
yaxunl added a comment.

added diagnosing referencing host variable in device functions


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91088/new/

https://reviews.llvm.org/D91088

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/Sema/SemaCUDA.cpp
  clang/lib/Sema/SemaExpr.cpp
  clang/test/CodeGenCUDA/function-overload.cu
  clang/test/CodeGenCUDA/lambda-reference-var.cu
  clang/test/SemaCUDA/device-use-host-var.cu

Index: clang/test/SemaCUDA/device-use-host-var.cu
===================================================================
--- /dev/null
+++ clang/test/SemaCUDA/device-use-host-var.cu
@@ -0,0 +1,160 @@
+// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -fsyntax-only -verify=dev %s
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify=host %s
+
+// host-no-diagnostics
+
+#include "Inputs/cuda.h"
+
+int global_host_var;
+__device__ int global_dev_var;
+__constant__ int global_constant_var;
+__shared__ int global_shared_var;
+constexpr int global_constexpr_var = 1;
+const int global_const_var = 1;
+
+template<typename F>
+__global__ void kernel(F f) { f(); } // dev-note2 {{called by 'kernel<(lambda}}
+
+__device__ void dev_fun(int *out) {
+  int &ref_host_var = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}}
+  int &ref_dev_var = global_dev_var;
+  int &ref_constant_var = global_constant_var;
+  int &ref_shared_var = global_shared_var;
+  const int &ref_constexpr_var = global_constexpr_var;
+  const int &ref_const_var = global_const_var;
+
+  *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}}
+  *out = global_dev_var;
+  *out = global_constant_var;
+  *out = global_shared_var;
+  *out = global_constexpr_var;
+  *out = global_const_var;
+
+  *out = ref_host_var;
+  *out = ref_dev_var;
+  *out = ref_constant_var;
+  *out = ref_shared_var;
+  *out = ref_constexpr_var;
+  *out = ref_const_var;
+}
+
+__global__ void global_fun(int *out) {
+  int &ref_host_var = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __global__ function}}
+  int &ref_dev_var = global_dev_var;
+  int &ref_constant_var = global_constant_var;
+  int &ref_shared_var = global_shared_var;
+  const int &ref_constexpr_var = global_constexpr_var;
+  const int &ref_const_var = global_const_var;
+
+  *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __global__ function}}
+  *out = global_dev_var;
+  *out = global_constant_var;
+  *out = global_shared_var;
+  *out = global_constexpr_var;
+  *out = global_const_var;
+
+  *out = ref_host_var;
+  *out = ref_dev_var;
+  *out = ref_constant_var;
+  *out = ref_shared_var;
+  *out = ref_constexpr_var;
+  *out = ref_const_var;
+}
+
+__host__ __device__ void host_dev_fun(int *out) {
+  int &ref_host_var = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}}
+  int &ref_dev_var = global_dev_var;
+  int &ref_constant_var = global_constant_var;
+  int &ref_shared_var = global_shared_var;
+  const int &ref_constexpr_var = global_constexpr_var;
+  const int &ref_const_var = global_const_var;
+
+  *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}}
+  *out = global_dev_var;
+  *out = global_constant_var;
+  *out = global_shared_var;
+  *out = global_constexpr_var;
+  *out = global_const_var;
+
+  *out = ref_host_var;
+  *out = ref_dev_var;
+  *out = ref_constant_var;
+  *out = ref_shared_var;
+  *out = ref_constexpr_var;
+  *out = ref_const_var;
+}
+
+inline __host__ __device__ void inline_host_dev_fun(int *out) {
+  int &ref_host_var = global_host_var;
+  int &ref_dev_var = global_dev_var;
+  int &ref_constant_var = global_constant_var;
+  int &ref_shared_var = global_shared_var;
+  const int &ref_constexpr_var = global_constexpr_var;
+  const int &ref_const_var = global_const_var;
+
+  *out = global_host_var;
+  *out = global_dev_var;
+  *out = global_constant_var;
+  *out = global_shared_var;
+  *out = global_constexpr_var;
+  *out = global_const_var;
+
+  *out = ref_host_var;
+  *out = ref_dev_var;
+  *out = ref_constant_var;
+  *out = ref_shared_var;
+  *out = ref_constexpr_var;
+  *out = ref_const_var;
+}
+
+void dev_lambda_capture_by_ref(int *out) {
+  int &ref_host_var = global_host_var;
+  kernel<<<1,1>>>([&]() {
+  int &ref_dev_var = global_dev_var;
+  int &ref_constant_var = global_constant_var;
+  int &ref_shared_var = global_shared_var;
+  const int &ref_constexpr_var = global_constexpr_var;
+  const int &ref_const_var = global_const_var;
+
+  *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}}
+                          // dev-error@-1 {{capture host variable 'out' by reference in device or host device lambda function}}
+  *out = global_dev_var;
+  *out = global_constant_var;
+  *out = global_shared_var;
+  *out = global_constexpr_var;
+  *out = global_const_var;
+
+  *out = ref_host_var; // dev-error {{capture host variable 'ref_host_var' by reference in device or host device lambda function}}
+  *out = ref_dev_var;
+  *out = ref_constant_var;
+  *out = ref_shared_var;
+  *out = ref_constexpr_var;
+  *out = ref_const_var;
+  });
+}
+
+void dev_lambda_capture_by_copy(int *out) {
+  int &ref_host_var = global_host_var;
+  kernel<<<1,1>>>([=]() {
+  int &ref_dev_var = global_dev_var;
+  int &ref_constant_var = global_constant_var;
+  int &ref_shared_var = global_shared_var;
+  const int &ref_constexpr_var = global_constexpr_var;
+  const int &ref_const_var = global_const_var;
+
+  *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}}
+  *out = global_dev_var;
+  *out = global_constant_var;
+  *out = global_shared_var;
+  *out = global_constexpr_var;
+  *out = global_const_var;
+
+  *out = ref_host_var;
+  *out = ref_dev_var;
+  *out = ref_constant_var;
+  *out = ref_shared_var;
+  *out = ref_constexpr_var;
+  *out = ref_const_var;
+  });
+}
+
Index: clang/test/CodeGenCUDA/lambda-reference-var.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/lambda-reference-var.cu
@@ -0,0 +1,126 @@
+// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
+// RUN:   -triple x86_64-linux-gnu \
+// RUN:   | FileCheck -check-prefix=HOST %s
+// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
+// RUN:   -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN:   | FileCheck -check-prefix=DEV %s
+
+#include "Inputs/cuda.h"
+
+// HOST: %[[T1:.*]] = type <{ i32*, i32, [4 x i8] }>
+// HOST: %[[T2:.*]] = type { i32*, i32** }
+// HOST: %[[T3:.*]] = type <{ i32*, i32, [4 x i8] }>
+// DEV: %[[T1:.*]] = type { i32* }
+// DEV: %[[T2:.*]] = type { i32** }
+// DEV: %[[T3:.*]] = type <{ i32*, i32, [4 x i8] }>
+int global_host_var;
+__device__ int global_device_var;
+
+template<class F>
+__global__ void kern(F f) { f(); }
+
+// DEV-LABEL: @_ZZ27dev_capture_dev_ref_by_copyPiENKUlvE_clEv(
+// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: store i32 %[[VAL]]
+__device__ void dev_capture_dev_ref_by_copy(int *out) {
+  int &ref = global_device_var;
+  [=](){ *out = ref;}();
+}
+
+// DEV-LABEL: @_ZZ26dev_capture_dev_ref_by_refPiENKUlvE_clEv(
+// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
+// DEV: store i32 %[[VAL2]], i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: store i32 %[[VAL]]
+__device__ void dev_capture_dev_ref_by_ref(int *out) {
+  int &ref = global_device_var;
+  [&](){ ref++; *out = ref;}();
+}
+
+// DEV-LABEL: define void @_Z7dev_refPi(
+// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
+// DEV: store i32 %[[VAL2]], i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: store i32 %[[VAL]]
+__device__ void dev_ref(int *out) {
+  int &ref = global_device_var;
+  ref++;
+  *out = ref;
+}
+
+// DEV-LABEL: @_ZZ14dev_lambda_refPiENKUlvE_clEv(
+// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
+// DEV: store i32 %[[VAL2]], i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: store i32 %[[VAL]]
+__device__ void dev_lambda_ref(int *out) {
+  [=](){
+    int &ref = global_device_var;
+    ref++;
+    *out = ref;
+  }();
+}
+
+// HOST-LABEL: @_ZZ29host_capture_host_ref_by_copyPiENKUlvE_clEv(
+// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: store i32 %[[VAL]]
+void host_capture_host_ref_by_copy(int *out) {
+  int &ref = global_host_var;
+  [=](){ *out = ref;}();
+}
+
+// HOST-LABEL: @_ZZ28host_capture_host_ref_by_refPiENKUlvE_clEv(
+// HOST: %[[CAP:.*]] = getelementptr inbounds %[[T2]], %[[T2]]* %this1, i32 0, i32 0
+// HOST: %[[REF:.*]] = load i32*, i32** %[[CAP]]
+// HOST: %[[VAL:.*]] = load i32, i32* %[[REF]]
+// HOST: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
+// HOST: store i32 %[[VAL2]], i32* %[[REF]]
+// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: store i32 %[[VAL]]
+void host_capture_host_ref_by_ref(int *out) {
+  int &ref = global_host_var;
+  [&](){ ref++; *out = ref;}();
+}
+
+// HOST-LABEL: define void @_Z8host_refPi(
+// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
+// HOST: store i32 %[[VAL2]], i32* @global_host_var
+// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: store i32 %[[VAL]]
+void host_ref(int *out) {
+  int &ref = global_host_var;
+  ref++;
+  *out = ref;
+}
+
+// HOST-LABEL: @_ZZ15host_lambda_refPiENKUlvE_clEv(
+// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
+// HOST: store i32 %[[VAL2]], i32* @global_host_var
+// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: store i32 %[[VAL]]
+void host_lambda_ref(int *out) {
+  [=](){
+    int &ref = global_host_var;
+    ref++;
+    *out = ref;
+  }();
+}
+
+// HOST-LABEL: define void @_Z28dev_capture_host_ref_by_copyPi(
+// HOST: %[[CAP:.*]] = getelementptr inbounds %[[T3]], %[[T3]]* %{{.*}}, i32 0, i32 1
+// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: store i32 %[[VAL]], i32* %[[CAP]]
+// DEV-LABEL: define internal void @_ZZ28dev_capture_host_ref_by_copyPiENKUlvE_clEv(
+// DEV: %[[CAP:.*]] = getelementptr inbounds %[[T3]], %[[T3]]* %this1, i32 0, i32 1
+// DEV: %[[VAL:.*]] = load i32, i32* %[[CAP]]
+// DEV: store i32 %[[VAL]]
+void dev_capture_host_ref_by_copy(int *out) {
+  int &ref = global_host_var;
+  kern<<<1, 1>>>([=]__device__() { *out = ref;});
+}
+
Index: clang/test/CodeGenCUDA/function-overload.cu
===================================================================
--- clang/test/CodeGenCUDA/function-overload.cu
+++ clang/test/CodeGenCUDA/function-overload.cu
@@ -12,6 +12,9 @@
 #include "Inputs/cuda.h"
 
 // Check constructors/destructors for D/H functions
+#ifdef __CUDA_ARCH__
+__device__
+#endif
 int x;
 struct s_cd_dh {
   __host__ s_cd_dh() { x = 11; }
Index: clang/lib/Sema/SemaExpr.cpp
===================================================================
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -354,6 +354,21 @@
 
   diagnoseUseOfInternalDeclInInlineFunction(*this, D, Loc);
 
+  if (LangOpts.CUDAIsDevice) {
+    auto *FD = dyn_cast_or_null<FunctionDecl>(CurContext);
+    auto Target = IdentifyCUDATarget(FD);
+    if (FD && Target != CFT_Host) {
+      const auto *VD = dyn_cast<VarDecl>(D);
+      if (VD && VD->hasGlobalStorage() && !VD->hasAttr<CUDADeviceAttr>() &&
+          !VD->hasAttr<CUDAConstantAttr>() && !VD->hasAttr<CUDASharedAttr>() &&
+          !VD->getType()->isCUDADeviceBuiltinSurfaceType() &&
+          !VD->getType()->isCUDADeviceBuiltinTextureType() &&
+          !VD->isConstexpr() && !VD->getType().isConstQualified())
+        targetDiag(*Locs.begin(), diag::err_ref_bad_target)
+            << /*host*/ 2 << /*variable*/ 1 << VD << Target;
+    }
+  }
+
   if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice)) {
     if (const auto *VD = dyn_cast<ValueDecl>(D))
       checkDeviceDecl(VD, Loc);
@@ -1939,13 +1954,47 @@
   if (isUnevaluatedContext())
     return NOUR_Unevaluated;
 
+  // CUDA/HIP: Check whether a captured reference variable is referencing a
+  // host variable in a device or host device lambda.
+  auto IsCapturingReferenceToHostVarInCUDADeviceLambda = [&](VarDecl *VD) {
+    if (!getLangOpts().CUDA || !VD->hasInit())
+      return false;
+    assert(VD->getType()->isReferenceType());
+
+    // Check whether the reference variable is referencing a host variable.
+    auto *DRE = dyn_cast<DeclRefExpr>(VD->getInit());
+    if (!DRE)
+      return false;
+    auto *Referee = dyn_cast<VarDecl>(DRE->getDecl());
+    if (!Referee || !Referee->hasGlobalStorage() ||
+        Referee->hasAttr<CUDADeviceAttr>())
+      return false;
+
+    // Check whether the current function is a device or host device lambda.
+    // Check whether the reference variable is a capture by getDeclContext()
+    // since refersToEnclosingVariableOrCapture() is not ready at this point.
+    auto *MD = dyn_cast_or_null<CXXMethodDecl>(CurContext);
+    if (MD && MD->getParent()->isLambda() &&
+        MD->getOverloadedOperator() == OO_Call &&
+        MD->hasAttr<CUDADeviceAttr>() && VD->getDeclContext() != MD)
+      return true;
+
+    return false;
+  };
   // C++2a [basic.def.odr]p4:
   //   A variable x whose name appears as a potentially-evaluated expression e
   //   is odr-used by e unless [...] x is a reference that is usable in
   //   constant expressions.
+  // CUDA/HIP:
+  //   If a reference variable referencing a host variable is captured in a
+  //   device or host device lambda, the value of the referee must be copied
+  //   to the capture and the reference variable must be treated as odr-use
+  //   since the value of the referee is not known at compile time and must
+  //   be loaded from the captured.
   if (VarDecl *VD = dyn_cast<VarDecl>(D)) {
     if (VD->getType()->isReferenceType() &&
         !(getLangOpts().OpenMP && isOpenMPCapturedDecl(D)) &&
+        !IsCapturingReferenceToHostVarInCUDADeviceLambda(VD) &&
         VD->isUsableInConstantExpressions(Context))
       return NOUR_Constant;
   }
Index: clang/lib/Sema/SemaCUDA.cpp
===================================================================
--- clang/lib/Sema/SemaCUDA.cpp
+++ clang/lib/Sema/SemaCUDA.cpp
@@ -743,7 +743,8 @@
     return true;
 
   SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this)
-      << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller);
+      << IdentifyCUDATarget(Callee) << /*function*/ 0 << Callee
+      << IdentifyCUDATarget(Caller);
   if (!Callee->getBuiltinID())
     SemaDiagnosticBuilder(DiagKind, Callee->getLocation(),
                           diag::note_previous_decl, Caller, *this)
Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -1522,6 +1522,29 @@
   if (result.HasSideEffects)
     return ConstantEmission();
 
+  // In CUDA/HIP device compilation, a lambda may capture a reference variable
+  // referencing a global host variable by copy. In this case the lambda should
+  // make a copy of the value of the global host variable. The DRE of the
+  // captured reference variable cannot be emitted as load from the host
+  // global variable as compile time constant, since the host variable is not
+  // accessible on device. The DRE of the captured reference variable has to be
+  // loaded from captures.
+  if (CGM.getLangOpts().CUDAIsDevice &&
+      refExpr->refersToEnclosingVariableOrCapture()) {
+    auto *MD = dyn_cast_or_null<CXXMethodDecl>(CurCodeDecl);
+    if (MD && MD->getParent()->isLambda() &&
+        MD->getOverloadedOperator() == OO_Call) {
+      const APValue::LValueBase &base = result.Val.getLValueBase();
+      if (const ValueDecl *D = base.dyn_cast<const ValueDecl *>()) {
+        if (const VarDecl *VD = dyn_cast<const VarDecl>(D)) {
+          if (!VD->hasAttr<CUDADeviceAttr>()) {
+            return ConstantEmission();
+          }
+        }
+      }
+    }
+  }
+
   // Emit as a constant.
   auto C = ConstantEmitter(*this).emitAbstract(refExpr->getLocation(),
                                                result.Val, resultType);
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8146,7 +8146,7 @@
   "call to global function %0 not configured">;
 def err_ref_bad_target : Error<
   "reference to %select{__device__|__global__|__host__|__host__ __device__}0 "
-  "function %1 in %select{__device__|__global__|__host__|__host__ __device__}2 function">;
+  "%select{function|variable}1 %2 in %select{__device__|__global__|__host__|__host__ __device__}3 function">;
 def err_ref_bad_target_global_initializer : Error<
   "reference to %select{__device__|__global__|__host__|__host__ __device__}0 "
   "function %1 in global initializer">;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to