yaxunl updated this revision to Diff 347958.
yaxunl added a comment.

do not promote or check dependent variables since their ctor/dtor/initializers 
are not determined yet


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103108/new/

https://reviews.llvm.org/D103108

Files:
  clang/lib/Sema/SemaCUDA.cpp
  clang/lib/Sema/SemaDecl.cpp
  clang/test/CodeGenCUDA/device-use-host-var.cu
  clang/test/SemaCUDA/device-use-host-var.cu

Index: clang/test/SemaCUDA/device-use-host-var.cu
===================================================================
--- clang/test/SemaCUDA/device-use-host-var.cu
+++ clang/test/SemaCUDA/device-use-host-var.cu
@@ -5,6 +5,8 @@
 
 #include "Inputs/cuda.h"
 
+int func();
+
 struct A {
   int x;
   static int host_var;
@@ -16,6 +18,19 @@
   int host_var;
 }
 
+// struct with non-empty ctor.
+struct B1 {
+  int x;
+  B1() { x = 1; }
+};
+
+// struct with non-empty dtor.
+struct B2 {
+  int x;
+  B2() {}
+  ~B2() { x = 0; }
+};
+
 static int static_host_var;
 
 __device__ int global_dev_var;
@@ -34,6 +49,17 @@
 const A global_const_struct_var{1};
 constexpr A global_constexpr_struct_var{1};
 
+// Check const host var initialized with non-empty ctor is not allowed in
+// device function.
+const B1 b1;
+
+// Check const host var having non-empty dtor is not allowed in device function.
+const B2 b2;
+
+// Check const host var initialized by non-constant initializer is not allowed
+// in device function.
+const int b3 = func();
+
 template<typename F>
 __global__ void kernel(F f) { f(); } // dev-note2 {{called by 'kernel<(lambda}}
 
@@ -53,11 +79,14 @@
   *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}}
   *out = global_const_var;
   *out = global_constexpr_var;
+  *out = b1.x; // dev-error {{reference to __host__ variable 'b1' in __device__ function}}
+  *out = b2.x; // dev-error {{reference to __host__ variable 'b2' in __device__ function}}
+  *out = b3; // dev-error {{reference to __host__ variable 'b3' in __device__ function}}
   global_host_var = 1; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}}
 
   // Check reference of non-constexpr host variables are not allowed.
   int &ref_host_var = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}}
-  const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __device__ function}}
+  const int &ref_const_var = global_const_var;
   const int &ref_constexpr_var = global_constexpr_var;
   *out = ref_host_var;
   *out = ref_constexpr_var;
@@ -65,18 +94,18 @@
 
   // Check access member of non-constexpr struct type host variable is not allowed.
   *out = global_host_struct_var.x; // dev-error {{reference to __host__ variable 'global_host_struct_var' in __device__ function}}
-  *out = global_const_struct_var.x; // dev-error {{reference to __host__ variable 'global_const_struct_var' in __device__ function}}
+  *out = global_const_struct_var.x;
   *out = global_constexpr_struct_var.x;
   global_host_struct_var.x = 1; // dev-error {{reference to __host__ variable 'global_host_struct_var' in __device__ function}}
 
   // Check address taking of non-constexpr host variables is not allowed.
   int *p = &global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}}
-  const int *cp = &global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __device__ function}}
+  const int *cp = &global_const_var;
   const int *cp2 = &global_constexpr_var;
 
   // Check access elements of non-constexpr host array is not allowed.
   *out = global_host_array[1]; // dev-error {{reference to __host__ variable 'global_host_array' in __device__ function}}
-  *out = global_const_array[1]; // dev-error {{reference to __host__ variable 'global_const_array' in __device__ function}}
+  *out = global_const_array[1];
   *out = global_constexpr_array[1];
 
   // Check ODR-use of host variables in namespace is not allowed.
@@ -103,7 +132,7 @@
   int &ref_constant_var = global_constant_var;
   int &ref_shared_var = global_shared_var;
   const int &ref_constexpr_var = global_constexpr_var;
-  const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __global__ function}}
+  const int &ref_const_var = global_const_var;
 
   *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __global__ function}}
   *out = global_dev_var;
@@ -126,7 +155,7 @@
   int &ref_constant_var = global_constant_var;
   int &ref_shared_var = global_shared_var;
   const int &ref_constexpr_var = global_constexpr_var;
-  const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __host__ __device__ function}}
+  const int &ref_const_var = global_const_var;
 
   *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}}
   *out = global_dev_var;
@@ -173,7 +202,7 @@
   int &ref_constant_var = global_constant_var;
   int &ref_shared_var = global_shared_var;
   const int &ref_constexpr_var = global_constexpr_var;
-  const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __host__ __device__ function}}
+  const int &ref_const_var = global_const_var;
 
   *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}}
                           // dev-error@-1 {{capture host variable 'out' by reference in device or host device lambda function}}
@@ -199,7 +228,7 @@
   int &ref_constant_var = global_constant_var;
   int &ref_shared_var = global_shared_var;
   const int &ref_constexpr_var = global_constexpr_var;
-  const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __host__ __device__ function}}
+  const int &ref_const_var = global_const_var;
 
   *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}}
   *out = global_dev_var;
Index: clang/test/CodeGenCUDA/device-use-host-var.cu
===================================================================
--- clang/test/CodeGenCUDA/device-use-host-var.cu
+++ clang/test/CodeGenCUDA/device-use-host-var.cu
@@ -1,5 +1,7 @@
-// RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa \
+// RUN: %clang_cc1 -std=c++14 -triple amdgcn-amd-amdhsa \
 // RUN:   -fcuda-is-device -emit-llvm -o - -x hip %s | FileCheck %s
+// RUN: %clang_cc1 -std=c++14 -triple amdgcn-amd-amdhsa \
+// RUN:   -fcuda-is-device -emit-llvm -o - -x hip %s | FileCheck -check-prefix=NEG %s
 
 #include "Inputs/cuda.h"
 
@@ -7,34 +9,84 @@
   int x;
 };
 
+// Check the situation of B<T> has empty ctor but B<int> has non-empty ctor.
+// Make sure const B<int> variables are not promoted to constant variables.
+template<typename T>
+struct B {
+  T x;
+  static const B<T> y;
+};
+
+template<>
+struct B<int> {
+  int x;
+  B() { x = 1; }
+  static const B<int> y;
+};
+
+template<typename T>
+const B<T> B<T>::y;
+
+const B<int> B<int>::y;
+
+template<typename T>
+T temp_fun(T x) {
+  return B<T>::y.x;
+}
+
 constexpr int constexpr_var = 1;
 constexpr A constexpr_struct{2};
 constexpr A constexpr_array[4] = {0, 0, 0, 3};
 constexpr char constexpr_str[] = "abcd";
 const int const_var = 4;
+const A const_struct{5};
+const A const_array[] = {0, 0, 0, 6};
+const char const_str[] = "xyz";
 
 // CHECK-DAG: @_ZL13constexpr_str.const = private unnamed_addr addrspace(4) constant [5 x i8] c"abcd\00"
 // CHECK-DAG: @_ZL13constexpr_var = internal addrspace(4) constant i32 1
 // CHECK-DAG: @_ZL16constexpr_struct = internal addrspace(4) constant %struct.A { i32 2 }
 // CHECK-DAG: @_ZL15constexpr_array = internal addrspace(4) constant [4 x %struct.A] [%struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A { i32 3 }]
-// CHECK-NOT: external
+// CHECK-DAG: @_ZL9const_var = internal addrspace(4) constant i32 4
+// CHECK-DAG: @_ZL12const_struct = internal addrspace(4) constant %struct.A { i32 5 }
+// CHECK-DAG: @_ZL11const_array = internal addrspace(4) constant [4 x %struct.A] [%struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A { i32 6 }]
+// CHECK-DAG: @_ZL9const_str = internal addrspace(4) constant [4 x i8] c"xyz\00"
+
+// NEG-NOT: @_ZN1BIiE1yE
+// NEG-NOT: external
 
 // CHECK-LABEL: define{{.*}}@_Z7dev_funPiPPKi
 // CHECK: store i32 1
 // CHECK: store i32 2
 // CHECK: store i32 3
-// CHECK: store i32 4
 // CHECK: load i8, i8* getelementptr {{.*}} @_ZL13constexpr_str.const
+// CHECK: store i32 4
+// CHECK: store i32 5
+// CHECK: store i32 6
+// CHECK: load i8, i8* getelementptr {{.*}} @_ZL9const_str
 // CHECK: store i32* {{.*}}@_ZL13constexpr_var
 // CHECK: store i32* getelementptr {{.*}} @_ZL16constexpr_struct
 // CHECK: store i32* getelementptr {{.*}} @_ZL15constexpr_array
+// CHECK: store i32* {{.*}}@_ZL9const_var
+// CHECK: store i32* getelementptr {{.*}} @_ZL12const_struct
+// CHECK: store i32* getelementptr {{.*}} @_ZL11const_array
 __device__ void dev_fun(int *out, const int **out2) {
   *out = constexpr_var;
   *out = constexpr_struct.x;
   *out = constexpr_array[3].x;
-  *out = const_var;
   *out = constexpr_str[3];
+  *out = const_var;
+  *out = const_struct.x;
+  *out = const_array[3].x;
+  *out = const_str[3];
   *out2 = &constexpr_var;
   *out2 = &constexpr_struct.x;
   *out2 = &constexpr_array[3].x;
+  *out2 = &const_var;
+  *out2 = &const_struct.x;
+  *out2 = &const_array[3].x;
+}
+
+void fun() {
+  temp_fun(1);
 }
Index: clang/lib/Sema/SemaDecl.cpp
===================================================================
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -12957,6 +12957,8 @@
 void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
   if (var->isInvalidDecl()) return;
 
+  MaybeAddCUDAConstantAttr(var);
+
   if (getLangOpts().OpenCL) {
     // OpenCL v2.0 s6.12.5 - Every block variable declaration must have an
     // initialiser
Index: clang/lib/Sema/SemaCUDA.cpp
===================================================================
--- clang/lib/Sema/SemaCUDA.cpp
+++ clang/lib/Sema/SemaCUDA.cpp
@@ -147,6 +147,9 @@
     return CVT_Unified;
   if (Var->isConstexpr() && !hasExplicitAttr<CUDAConstantAttr>(Var))
     return CVT_Both;
+  if (Var->getType().isConstQualified() && Var->hasAttr<CUDAConstantAttr>() &&
+      !hasExplicitAttr<CUDAConstantAttr>(Var))
+    return CVT_Both;
   if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||
       Var->hasAttr<CUDASharedAttr>() ||
       Var->getType()->isCUDADeviceBuiltinSurfaceType() ||
@@ -549,47 +552,77 @@
   return true;
 }
 
+namespace {
+enum CUDAInitializerCheckKind {
+  CICK_DeviceOrConstant, // Check initializer for device/constant variable
+  CICK_Shared,           // Check initializer for shared variable
+};
+
+bool IsDependentVar(VarDecl *VD) {
+  if (VD->getType()->isDependentType())
+    return true;
+  if (const auto *Init = VD->getInit())
+    return Init->isValueDependent();
+  return false;
+}
+// Check whether a variable has an allowed initializer for a CUDA device side
+// variable with global storage. \p VD may be a host variable to be checked for
+// potential promotion to device side variable.
+//
+// CUDA/HIP allows only empty constructors as initializers for global
+// variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all
+// __shared__ variables whether they are local or not (they all are implicitly
+// static in CUDA). One exception is that CUDA allows constant initializers
+// for __constant__ and __device__ variables.
+bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD,
+                                           CUDAInitializerCheckKind CheckKind) {
+  assert(!VD->isInvalidDecl() && VD->hasGlobalStorage());
+  assert(!IsDependentVar(VD) && "do not check dependent var");
+  const Expr *Init = VD->getInit();
+  auto IsEmptyInit = [&](const Expr *Init) {
+    if (!Init)
+      return true;
+    if (const auto *CE = dyn_cast<CXXConstructExpr>(Init)) {
+      return S.isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor());
+    }
+    return false;
+  };
+  auto IsConstantInit = [&](const Expr *Init) {
+    assert(Init);
+    return Init->isConstantInitializer(S.Context,
+                                       VD->getType()->isReferenceType());
+  };
+  auto HasEmptyDtor = [&](VarDecl *VD) {
+    if (const auto *RD = VD->getType()->getAsCXXRecordDecl())
+      return S.isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor());
+    return true;
+  };
+  if (CheckKind == CICK_Shared)
+    return IsEmptyInit(Init) && HasEmptyDtor(VD);
+  return S.LangOpts.GPUAllowDeviceInit ||
+         ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD));
+}
+} // namespace
+
 void Sema::checkAllowedCUDAInitializer(VarDecl *VD) {
-  if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage())
+  // Do not check dependent variables since the ctor/dtor/initializer are not
+  // determined. Do it after instantiation.
+  if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() ||
+      IsDependentVar(VD))
     return;
   const Expr *Init = VD->getInit();
-  if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() ||
-      VD->hasAttr<CUDASharedAttr>()) {
-    if (LangOpts.GPUAllowDeviceInit)
+  bool IsSharedVar = VD->hasAttr<CUDASharedAttr>();
+  bool IsDeviceOrConstantVar =
+      !IsSharedVar &&
+      (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>());
+  if (IsDeviceOrConstantVar || IsSharedVar) {
+    if (HasAllowedCUDADeviceStaticInitializer(
+            *this, VD, IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant))
       return;
-    bool AllowedInit = false;
-    if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init))
-      AllowedInit =
-          isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor());
-    // We'll allow constant initializers even if it's a non-empty
-    // constructor according to CUDA rules. This deviates from NVCC,
-    // but allows us to handle things like constexpr constructors.
-    if (!AllowedInit &&
-        (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) {
-      auto *Init = VD->getInit();
-      // isConstantInitializer cannot be called with dependent value, therefore
-      // we skip checking dependent value here. This is OK since
-      // checkAllowedCUDAInitializer is called again when the template is
-      // instantiated.
-      AllowedInit =
-          VD->getType()->isDependentType() || Init->isValueDependent() ||
-          Init->isConstantInitializer(Context,
-                                      VD->getType()->isReferenceType());
-    }
-
-    // Also make sure that destructor, if there is one, is empty.
-    if (AllowedInit)
-      if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl())
-        AllowedInit =
-            isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor());
-
-    if (!AllowedInit) {
-      Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>()
-                                  ? diag::err_shared_var_init
-                                  : diag::err_dynamic_var_init)
-          << Init->getSourceRange();
-      VD->setInvalidDecl();
-    }
+    Diag(VD->getLocation(),
+         IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init)
+        << Init->getSourceRange();
+    VD->setInvalidDecl();
   } else {
     // This is a host-side global variable.  Check that the initializer is
     // callable from the host side.
@@ -673,9 +706,15 @@
 }
 
 void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) {
-  if (getLangOpts().CUDAIsDevice && VD->isConstexpr() &&
+  // Do not promote dependent variables since the cotr/dtor/initializer are
+  // not determined. Do it after instantiation.
+  if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() &&
+      !VD->hasAttr<CUDAConstantAttr>() && !VD->hasAttr<CUDASharedAttr>() &&
       (VD->isFileVarDecl() || VD->isStaticDataMember()) &&
-      !VD->hasAttr<CUDAConstantAttr>()) {
+      !IsDependentVar(VD) &&
+      (VD->isConstexpr() || (VD->getType().isConstQualified() &&
+                             HasAllowedCUDADeviceStaticInitializer(
+                                 *this, VD, CICK_DeviceOrConstant)))) {
     VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext()));
   }
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to