yaxunl updated this revision to Diff 205503.
yaxunl added a comment.

Fix visibility and dso_local. Allow undefined symbol in code object. This is to 
allow merging the host and device symbols at run time.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62738/new/

https://reviews.llvm.org/D62738

Files:
  include/clang/Basic/Attr.td
  include/clang/Basic/AttrDocs.td
  lib/CodeGen/CodeGenModule.cpp
  lib/CodeGen/TargetInfo.cpp
  lib/Driver/ToolChains/HIP.cpp
  lib/Sema/SemaDeclAttr.cpp
  test/AST/ast-dump-cuda-device-shadow.cu
  test/CodeGenCUDA/device-shadow.cu
  test/Driver/hip-toolchain-no-rdc.hip
  test/Driver/hip-toolchain-rdc.hip
  test/Misc/pragma-attribute-supported-attributes-list.test

Index: test/Misc/pragma-attribute-supported-attributes-list.test
===================================================================
--- test/Misc/pragma-attribute-supported-attributes-list.test
+++ test/Misc/pragma-attribute-supported-attributes-list.test
@@ -26,6 +26,7 @@
 // CHECK-NEXT: CPUSpecific (SubjectMatchRule_function)
 // CHECK-NEXT: CUDAConstant (SubjectMatchRule_variable)
 // CHECK-NEXT: CUDADevice (SubjectMatchRule_function, SubjectMatchRule_variable)
+// CHECK-NEXT: CUDADeviceShadow (SubjectMatchRule_variable)
 // CHECK-NEXT: CUDAGlobal (SubjectMatchRule_function)
 // CHECK-NEXT: CUDAHost (SubjectMatchRule_function)
 // CHECK-NEXT: CUDALaunchBounds (SubjectMatchRule_objc_method, SubjectMatchRule_hasType_functionType)
Index: test/Driver/hip-toolchain-rdc.hip
===================================================================
--- test/Driver/hip-toolchain-rdc.hip
+++ test/Driver/hip-toolchain-rdc.hip
@@ -43,7 +43,7 @@
 // CHECK-SAME: "-filetype=obj"
 // CHECK-SAME: "-mcpu=gfx803" "-o" [[OBJ_DEV1:".*-gfx803-.*o"]]
 
-// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
+// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "-shared"
 // CHECK-SAME: "-o" "[[IMG_DEV1:.*out]]" [[OBJ_DEV1]]
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" 
@@ -75,7 +75,7 @@
 // CHECK-SAME: "-filetype=obj"
 // CHECK-SAME: "-mcpu=gfx900" "-o" [[OBJ_DEV2:".*-gfx900-.*o"]]
 
-// CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared"
+// CHECK: [[LLD]] "-flavor" "gnu" "-shared"
 // CHECK-SAME: "-o" "[[IMG_DEV2:.*out]]" [[OBJ_DEV2]]
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
Index: test/Driver/hip-toolchain-no-rdc.hip
===================================================================
--- test/Driver/hip-toolchain-no-rdc.hip
+++ test/Driver/hip-toolchain-no-rdc.hip
@@ -37,7 +37,7 @@
 // CHECK-SAME: "-filetype=obj"
 // CHECK-SAME: "-mcpu=gfx803" "-o" [[OBJ_DEV_A_803:".*-gfx803-.*o"]]
 
-// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
+// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "-shared"
 // CHECK-SAME: "-o" "[[IMG_DEV_A_803:.*out]]" [[OBJ_DEV_A_803]]
 
 //
@@ -65,7 +65,7 @@
 // CHECK-SAME: "-filetype=obj"
 // CHECK-SAME: "-mcpu=gfx900" "-o" [[OBJ_DEV_A_900:".*-gfx900-.*o"]]
 
-// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
+// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "-shared"
 // CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]]
 
 //
@@ -109,7 +109,7 @@
 // CHECK-SAME: "-filetype=obj"
 // CHECK-SAME: "-mcpu=gfx803" "-o" [[OBJ_DEV_B_803:".*-gfx803-.*o"]]
 
-// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
+// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "-shared"
 // CHECK-SAME: "-o" "[[IMG_DEV_B_803:.*out]]" [[OBJ_DEV_B_803]]
 
 //
@@ -137,7 +137,7 @@
 // CHECK-SAME: "-filetype=obj"
 // CHECK-SAME: "-mcpu=gfx900" "-o" [[OBJ_DEV_B_900:".*-gfx900-.*o"]]
 
-// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
+// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "-shared"
 // CHECK-SAME: "-o" "[[IMG_DEV_B_900:.*out]]" [[OBJ_DEV_B_900]]
 
 //
Index: test/CodeGenCUDA/device-shadow.cu
===================================================================
--- /dev/null
+++ test/CodeGenCUDA/device-shadow.cu
@@ -0,0 +1,28 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -std=c++11 -fcuda-is-device \
+// RUN:     -emit-llvm -o - %s | FileCheck -check-prefixes=CUDADEV %s
+// RUN: %clang_cc1 -triple x86_64 -std=c++11 \
+// RUN:     -emit-llvm -o - %s | FileCheck -check-prefixes=CUDAHOST %s
+
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 -fvisibility hidden -fapply-global-visibility-to-externs \
+// RUN:     -emit-llvm -o - -x hip %s | FileCheck -check-prefixes=HIPDEV %s
+// RUN: %clang_cc1 -triple x86_64 -std=c++11 \
+// RUN:     -emit-llvm -o - -x hip %s | FileCheck -check-prefixes=HIPHOST %s
+
+struct textureReference {
+  int a;
+};
+
+template <class T, int texType, int hipTextureReadMode>
+struct texture : public textureReference {
+texture() { a = 1; }
+};
+
+__attribute__((device_shadow)) texture<float, 2, 1> tex;
+// CUDADEV-NOT: @tex
+// CUDAHOST-NOT: call i32 @__hipRegisterVar{{.*}}@tex
+// HIPDEV: @tex = external addrspace(1) global %struct.texture
+// HIPDEV-NOT: declare{{.*}}void @_ZN7textureIfLi2ELi1EEC1Ev
+// HIPHOST:  define{{.*}}@_ZN7textureIfLi2ELi1EEC1Ev
+// HIPHOST:  call i32 @__hipRegisterVar{{.*}}@tex{{.*}}i32 0, i32 4, i32 0, i32 0)
Index: test/AST/ast-dump-cuda-device-shadow.cu
===================================================================
--- /dev/null
+++ test/AST/ast-dump-cuda-device-shadow.cu
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -fcuda-is-device -ast-dump -ast-dump-filter tex %s | FileCheck -strict-whitespace %s
+// RUN: %clang_cc1 -ast-dump -ast-dump-filter tex %s | FileCheck -strict-whitespace %s
+struct textureReference {
+  int a;
+};
+
+// CHECK: CUDADeviceShadowAttr
+template <class T, int texType, int hipTextureReadMode>
+struct texture : public textureReference {
+texture() { a = 1; }
+};
+
+__attribute__((device_shadow)) texture<float, 1, 1> tex;
Index: lib/Sema/SemaDeclAttr.cpp
===================================================================
--- lib/Sema/SemaDeclAttr.cpp
+++ lib/Sema/SemaDeclAttr.cpp
@@ -6786,6 +6786,10 @@
   case ParsedAttr::AT_CUDAHost:
     handleSimpleAttributeWithExclusions<CUDAHostAttr, CUDAGlobalAttr>(S, D, AL);
     break;
+  case ParsedAttr::AT_CUDADeviceShadow:
+    handleSimpleAttributeWithExclusions<CUDADeviceShadowAttr, CUDADeviceAttr>(
+        S, D, AL);
+    break;
   case ParsedAttr::AT_GNUInline:
     handleGNUInlineAttr(S, D, AL);
     break;
Index: lib/Driver/ToolChains/HIP.cpp
===================================================================
--- lib/Driver/ToolChains/HIP.cpp
+++ lib/Driver/ToolChains/HIP.cpp
@@ -174,9 +174,8 @@
                                           const char *InputFileName) const {
   // Construct lld command.
   // The output from ld.lld is an HSA code object file.
-  ArgStringList LldArgs{"-flavor",    "gnu", "--no-undefined",
-                        "-shared",    "-o",  Output.getFilename(),
-                        InputFileName};
+  ArgStringList LldArgs{
+      "-flavor", "gnu", "-shared", "-o", Output.getFilename(), InputFileName};
   SmallString<128> LldPath(C.getDriver().Dir);
   llvm::sys::path::append(LldPath, "lld");
   const char *Lld = Args.MakeArgString(LldPath);
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -7848,12 +7848,24 @@
   return D->hasAttr<OpenCLKernelAttr>() ||
          (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) ||
          (isa<VarDecl>(D) &&
-          (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()));
+          (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
+           D->hasAttr<CUDADeviceShadowAttr>()));
+}
+
+static bool requiresAMDGPUDefaultVisibility(const Decl *D,
+                                            llvm::GlobalValue *GV) {
+  if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility)
+    return false;
+
+  return isa<VarDecl>(D) && D->hasAttr<CUDADeviceShadowAttr>();
 }
 
 void AMDGPUTargetCodeGenInfo::setTargetAttributes(
     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
-  if (requiresAMDGPUProtectedVisibility(D, GV)) {
+  if (requiresAMDGPUDefaultVisibility(D, GV)) {
+    GV->setVisibility(llvm::GlobalValue::DefaultVisibility);
+    GV->setDSOLocal(false);
+  } else if (requiresAMDGPUProtectedVisibility(D, GV)) {
     GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
     GV->setDSOLocal(true);
   }
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -2414,7 +2414,8 @@
       if (!Global->hasAttr<CUDADeviceAttr>() &&
           !Global->hasAttr<CUDAGlobalAttr>() &&
           !Global->hasAttr<CUDAConstantAttr>() &&
-          !Global->hasAttr<CUDASharedAttr>())
+          !Global->hasAttr<CUDASharedAttr>() &&
+          !(LangOpts.HIP && Global->hasAttr<CUDADeviceShadowAttr>()))
         return;
     } else {
       // We need to emit host-side 'shadows' for all global
@@ -3769,7 +3770,12 @@
       !getLangOpts().CUDAIsDevice &&
       (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>() ||
        D->hasAttr<CUDASharedAttr>());
-  if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar))
+  // Device side shadow of initialized host-side global variables are also
+  // left undefined.
+  bool IsHIPDeviceShadowVar = getLangOpts().HIP && getLangOpts().CUDAIsDevice &&
+                              D->hasAttr<CUDADeviceShadowAttr>();
+  if (getLangOpts().CUDA &&
+      (IsCUDASharedVar || IsCUDAShadowVar || IsHIPDeviceShadowVar))
     Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
   else if (!InitExpr) {
     // This is a tentative definition; tentative definitions are
@@ -3880,7 +3886,8 @@
       // global variables become internal definitions. These have to
       // be internal in order to prevent name conflicts with global
       // host variables with the same name in a different TUs.
-      if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) {
+      if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
+          (D->hasAttr<CUDADeviceShadowAttr>() && LangOpts.HIP)) {
         Linkage = llvm::GlobalValue::InternalLinkage;
 
         // Shadow variables and their properties must be registered
@@ -3904,7 +3911,8 @@
     }
   }
 
-  GV->setInitializer(Init);
+  if (!IsHIPDeviceShadowVar)
+    GV->setInitializer(Init);
   if (emitter) emitter->finalize(GV);
 
   // If it is safe to mark the global 'constant', do so now.
Index: include/clang/Basic/AttrDocs.td
===================================================================
--- include/clang/Basic/AttrDocs.td
+++ include/clang/Basic/AttrDocs.td
@@ -4157,3 +4157,17 @@
 ``__attribute__((malloc))``.
 }];
 }
+
+def DeviceShadowDocs : Documentation {
+  let Category = DocCatType;
+  let Content = [{
+The GNU style attribute __attribute__((device_shadow)) or MSVC style attribute
+__declspec(device_shadow) can be added to the definition of a global variable
+to indicate it is a HIP device shadow variable. A device shadow variable can
+be accessed on both device side and host side. It has external linkage and is
+not initialized on device side. It has internal linkage and is initialized by
+the initializer on host side.
+
+It is ignored for CUDA and other languages.
+  }];
+}
\ No newline at end of file
Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -951,6 +951,13 @@
   let Documentation = [Undocumented];
 }
 
+def CUDADeviceShadow : InheritableAttr {
+  let Spellings = [GNU<"device_shadow">, Declspec<"__device_shadow__">];
+  let Subjects = SubjectList<[Var]>;
+  let LangOpts = [CUDA];
+  let Documentation = [DeviceShadowDocs];
+}
+
 def CUDADeviceBuiltin : IgnoredAttr {
   let Spellings = [GNU<"device_builtin">, Declspec<"__device_builtin__">];
   let LangOpts = [CUDA];
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to