yaxunl updated this revision to Diff 205503. yaxunl added a comment. Fix visibility and dso_local. Allow undefined symbol in code object. This is to allow merging the host and device symbols at run time.
CHANGES SINCE LAST ACTION https://reviews.llvm.org/D62738/new/ https://reviews.llvm.org/D62738 Files: include/clang/Basic/Attr.td include/clang/Basic/AttrDocs.td lib/CodeGen/CodeGenModule.cpp lib/CodeGen/TargetInfo.cpp lib/Driver/ToolChains/HIP.cpp lib/Sema/SemaDeclAttr.cpp test/AST/ast-dump-cuda-device-shadow.cu test/CodeGenCUDA/device-shadow.cu test/Driver/hip-toolchain-no-rdc.hip test/Driver/hip-toolchain-rdc.hip test/Misc/pragma-attribute-supported-attributes-list.test
Index: test/Misc/pragma-attribute-supported-attributes-list.test =================================================================== --- test/Misc/pragma-attribute-supported-attributes-list.test +++ test/Misc/pragma-attribute-supported-attributes-list.test @@ -26,6 +26,7 @@ // CHECK-NEXT: CPUSpecific (SubjectMatchRule_function) // CHECK-NEXT: CUDAConstant (SubjectMatchRule_variable) // CHECK-NEXT: CUDADevice (SubjectMatchRule_function, SubjectMatchRule_variable) +// CHECK-NEXT: CUDADeviceShadow (SubjectMatchRule_variable) // CHECK-NEXT: CUDAGlobal (SubjectMatchRule_function) // CHECK-NEXT: CUDAHost (SubjectMatchRule_function) // CHECK-NEXT: CUDALaunchBounds (SubjectMatchRule_objc_method, SubjectMatchRule_hasType_functionType) Index: test/Driver/hip-toolchain-rdc.hip =================================================================== --- test/Driver/hip-toolchain-rdc.hip +++ test/Driver/hip-toolchain-rdc.hip @@ -43,7 +43,7 @@ // CHECK-SAME: "-filetype=obj" // CHECK-SAME: "-mcpu=gfx803" "-o" [[OBJ_DEV1:".*-gfx803-.*o"]] -// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV1:.*out]]" [[OBJ_DEV1]] // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" @@ -75,7 +75,7 @@ // CHECK-SAME: "-filetype=obj" // CHECK-SAME: "-mcpu=gfx900" "-o" [[OBJ_DEV2:".*-gfx900-.*o"]] -// CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK: [[LLD]] "-flavor" "gnu" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV2:.*out]]" [[OBJ_DEV2]] // CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" Index: test/Driver/hip-toolchain-no-rdc.hip =================================================================== --- test/Driver/hip-toolchain-no-rdc.hip +++ test/Driver/hip-toolchain-no-rdc.hip @@ -37,7 +37,7 @@ // CHECK-SAME: "-filetype=obj" // CHECK-SAME: "-mcpu=gfx803" "-o" [[OBJ_DEV_A_803:".*-gfx803-.*o"]] -// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_A_803:.*out]]" [[OBJ_DEV_A_803]] // @@ -65,7 +65,7 @@ // CHECK-SAME: "-filetype=obj" // CHECK-SAME: "-mcpu=gfx900" "-o" [[OBJ_DEV_A_900:".*-gfx900-.*o"]] -// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]] // @@ -109,7 +109,7 @@ // CHECK-SAME: "-filetype=obj" // CHECK-SAME: "-mcpu=gfx803" "-o" [[OBJ_DEV_B_803:".*-gfx803-.*o"]] -// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_B_803:.*out]]" [[OBJ_DEV_B_803]] // @@ -137,7 +137,7 @@ // CHECK-SAME: "-filetype=obj" // CHECK-SAME: "-mcpu=gfx900" "-o" [[OBJ_DEV_B_900:".*-gfx900-.*o"]] -// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_B_900:.*out]]" [[OBJ_DEV_B_900]] // Index: test/CodeGenCUDA/device-shadow.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/device-shadow.cu @@ -0,0 +1,28 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -std=c++11 -fcuda-is-device \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=CUDADEV %s +// RUN: %clang_cc1 -triple x86_64 -std=c++11 \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=CUDAHOST %s + +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 -fvisibility hidden -fapply-global-visibility-to-externs \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefixes=HIPDEV %s +// RUN: %clang_cc1 -triple x86_64 -std=c++11 \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefixes=HIPHOST %s + +struct textureReference { + int a; +}; + +template <class T, int texType, int hipTextureReadMode> +struct texture : public textureReference { +texture() { a = 1; } +}; + +__attribute__((device_shadow)) texture<float, 2, 1> tex; +// CUDADEV-NOT: @tex +// CUDAHOST-NOT: call i32 @__hipRegisterVar{{.*}}@tex +// HIPDEV: @tex = external addrspace(1) global %struct.texture +// HIPDEV-NOT: declare{{.*}}void @_ZN7textureIfLi2ELi1EEC1Ev +// HIPHOST: define{{.*}}@_ZN7textureIfLi2ELi1EEC1Ev +// HIPHOST: call i32 @__hipRegisterVar{{.*}}@tex{{.*}}i32 0, i32 4, i32 0, i32 0) Index: test/AST/ast-dump-cuda-device-shadow.cu =================================================================== --- /dev/null +++ test/AST/ast-dump-cuda-device-shadow.cu @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -fcuda-is-device -ast-dump -ast-dump-filter tex %s | FileCheck -strict-whitespace %s +// RUN: %clang_cc1 -ast-dump -ast-dump-filter tex %s | FileCheck -strict-whitespace %s +struct textureReference { + int a; +}; + +// CHECK: CUDADeviceShadowAttr +template <class T, int texType, int hipTextureReadMode> +struct texture : public textureReference { +texture() { a = 1; } +}; + +__attribute__((device_shadow)) texture<float, 1, 1> tex; Index: lib/Sema/SemaDeclAttr.cpp =================================================================== --- lib/Sema/SemaDeclAttr.cpp +++ lib/Sema/SemaDeclAttr.cpp @@ -6786,6 +6786,10 @@ case ParsedAttr::AT_CUDAHost: handleSimpleAttributeWithExclusions<CUDAHostAttr, CUDAGlobalAttr>(S, D, AL); break; + case ParsedAttr::AT_CUDADeviceShadow: + handleSimpleAttributeWithExclusions<CUDADeviceShadowAttr, CUDADeviceAttr>( + S, D, AL); + break; case ParsedAttr::AT_GNUInline: handleGNUInlineAttr(S, D, AL); break; Index: lib/Driver/ToolChains/HIP.cpp =================================================================== --- lib/Driver/ToolChains/HIP.cpp +++ lib/Driver/ToolChains/HIP.cpp @@ -174,9 +174,8 @@ const char *InputFileName) const { // Construct lld command. // The output from ld.lld is an HSA code object file. - ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", - "-shared", "-o", Output.getFilename(), - InputFileName}; + ArgStringList LldArgs{ + "-flavor", "gnu", "-shared", "-o", Output.getFilename(), InputFileName}; SmallString<128> LldPath(C.getDriver().Dir); llvm::sys::path::append(LldPath, "lld"); const char *Lld = Args.MakeArgString(LldPath); Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -7848,12 +7848,24 @@ return D->hasAttr<OpenCLKernelAttr>() || (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) || (isa<VarDecl>(D) && - (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>())); + (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || + D->hasAttr<CUDADeviceShadowAttr>())); +} + +static bool requiresAMDGPUDefaultVisibility(const Decl *D, + llvm::GlobalValue *GV) { + if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility) + return false; + + return isa<VarDecl>(D) && D->hasAttr<CUDADeviceShadowAttr>(); } void AMDGPUTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { - if (requiresAMDGPUProtectedVisibility(D, GV)) { + if (requiresAMDGPUDefaultVisibility(D, GV)) { + GV->setVisibility(llvm::GlobalValue::DefaultVisibility); + GV->setDSOLocal(false); + } else if (requiresAMDGPUProtectedVisibility(D, GV)) { GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); GV->setDSOLocal(true); } Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -2414,7 +2414,8 @@ if (!Global->hasAttr<CUDADeviceAttr>() && !Global->hasAttr<CUDAGlobalAttr>() && !Global->hasAttr<CUDAConstantAttr>() && - !Global->hasAttr<CUDASharedAttr>()) + !Global->hasAttr<CUDASharedAttr>() && + !(LangOpts.HIP && Global->hasAttr<CUDADeviceShadowAttr>())) return; } else { // We need to emit host-side 'shadows' for all global @@ -3769,7 +3770,12 @@ !getLangOpts().CUDAIsDevice && (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDASharedAttr>()); - if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar)) + // Device side shadow of initialized host-side global variables are also + // left undefined. + bool IsHIPDeviceShadowVar = getLangOpts().HIP && getLangOpts().CUDAIsDevice && + D->hasAttr<CUDADeviceShadowAttr>(); + if (getLangOpts().CUDA && + (IsCUDASharedVar || IsCUDAShadowVar || IsHIPDeviceShadowVar)) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); else if (!InitExpr) { // This is a tentative definition; tentative definitions are @@ -3880,7 +3886,8 @@ // global variables become internal definitions. These have to // be internal in order to prevent name conflicts with global // host variables with the same name in a different TUs. - if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) { + if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || + (D->hasAttr<CUDADeviceShadowAttr>() && LangOpts.HIP)) { Linkage = llvm::GlobalValue::InternalLinkage; // Shadow variables and their properties must be registered @@ -3904,7 +3911,8 @@ } } - GV->setInitializer(Init); + if (!IsHIPDeviceShadowVar) + GV->setInitializer(Init); if (emitter) emitter->finalize(GV); // If it is safe to mark the global 'constant', do so now. Index: include/clang/Basic/AttrDocs.td =================================================================== --- include/clang/Basic/AttrDocs.td +++ include/clang/Basic/AttrDocs.td @@ -4157,3 +4157,17 @@ ``__attribute__((malloc))``. }]; } + +def DeviceShadowDocs : Documentation { + let Category = DocCatType; + let Content = [{ +The GNU style attribute __attribute__((device_shadow)) or MSVC style attribute +__declspec(device_shadow) can be added to the definition of a global variable +to indicate it is a HIP device shadow variable. A device shadow variable can +be accessed on both device side and host side. It has external linkage and is +not initialized on device side. It has internal linkage and is initialized by +the initializer on host side. + +It is ignored for CUDA and other languages. + }]; +} \ No newline at end of file Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -951,6 +951,13 @@ let Documentation = [Undocumented]; } +def CUDADeviceShadow : InheritableAttr { + let Spellings = [GNU<"device_shadow">, Declspec<"__device_shadow__">]; + let Subjects = SubjectList<[Var]>; + let LangOpts = [CUDA]; + let Documentation = [DeviceShadowDocs]; +} + def CUDADeviceBuiltin : IgnoredAttr { let Spellings = [GNU<"device_builtin">, Declspec<"__device_builtin__">]; let LangOpts = [CUDA];
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits