This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
yaxunl marked an inline comment as done.
Closed by commit rG0424b5115cff: [CUDA][HIP] Fix host used external kernel in 
archive (authored by yaxunl).
Herald added a project: clang.

Changed prior to commit:
  https://reviews.llvm.org/D123441?vs=421680&id=422519#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D123441/new/

https://reviews.llvm.org/D123441

Files:
  clang/include/clang/AST/ASTContext.h
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/Sema/SemaCUDA.cpp
  clang/lib/Sema/SemaExpr.cpp
  clang/test/CodeGenCUDA/host-used-extern.cu

Index: clang/test/CodeGenCUDA/host-used-extern.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/host-used-extern.cu
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -fgpu-rdc -std=c++11 -emit-llvm -o - -target-cpu gfx906 | FileCheck %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -fgpu-rdc -std=c++11 -emit-llvm -o - -target-cpu gfx906 \
+// RUN:   | FileCheck -check-prefix=NEG %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -std=c++11 -emit-llvm -o - -target-cpu gfx906 \
+// RUN:   | FileCheck -check-prefixes=NEG,NORDC %s
+
+#include "Inputs/cuda.h"
+
+// CHECK-LABEL: @gpu.used.external = appending {{.*}}global
+// CHECK-DAG: @_Z7kernel1v
+// CHECK-DAG: @_Z7kernel4v
+// CHECK-DAG: @var1
+// CHECK-LABEL: @llvm.compiler.used = {{.*}} @gpu.used.external
+
+// NEG-NOT: @gpu.used.external = {{.*}} @_Z7kernel2v
+// NEG-NOT: @gpu.used.external = {{.*}} @_Z7kernel3v
+// NEG-NOT: @gpu.used.external = {{.*}} @var2
+// NEG-NOT: @gpu.used.external = {{.*}} @var3
+// NORDC-NOT: @gpu.used.external = {{.*}} @_Z7kernel1v
+// NORDC-NOT: @gpu.used.external = {{.*}} @_Z7kernel4v
+// NORDC-NOT: @gpu.used.external = {{.*}} @var1
+
+__global__ void kernel1();
+
+// kernel2 is not marked as used since it is a definition.
+__global__ void kernel2() {}
+
+// kernel3 is not marked as used since it is not called by host function.
+__global__ void kernel3();
+
+// kernel4 is marked as used even though it is not called.
+__global__ void kernel4();
+
+extern __device__ int var1;
+
+__device__ int var2;
+
+extern __device__ int var3;
+
+void use(int *p);
+
+void test() {
+  kernel1<<<1, 1>>>();
+  void *p = (void*)kernel4;
+  use(&var1);
+}
Index: clang/lib/Sema/SemaExpr.cpp
===================================================================
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -17908,8 +17908,7 @@
       }
     } else if (VarTarget == Sema::CVT_Device &&
                (UserTarget == Sema::CFT_Host ||
-                UserTarget == Sema::CFT_HostDevice) &&
-               !Var->hasExternalStorage()) {
+                UserTarget == Sema::CFT_HostDevice)) {
       // Record a CUDA/HIP device side variable if it is ODR-used
       // by host code. This is done conservatively, when the variable is
       // referenced in any of the following contexts:
@@ -17920,7 +17919,10 @@
       // be visible in the device compilation for the compiler to be able to
       // emit template variables instantiated by host code only and to
       // externalize the static device side variable ODR-used by host code.
-      SemaRef.getASTContext().CUDADeviceVarODRUsedByHost.insert(Var);
+      if (!Var->hasExternalStorage())
+        SemaRef.getASTContext().CUDADeviceVarODRUsedByHost.insert(Var);
+      else if (SemaRef.LangOpts.GPURelocatableDeviceCode)
+        SemaRef.getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Var);
     }
   }
 
Index: clang/lib/Sema/SemaCUDA.cpp
===================================================================
--- clang/lib/Sema/SemaCUDA.cpp
+++ clang/lib/Sema/SemaCUDA.cpp
@@ -819,8 +819,13 @@
     }
   }();
 
-  if (DiagKind == SemaDiagnosticBuilder::K_Nop)
+  if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
+    // For -fgpu-rdc, keep track of external kernels used by host functions.
+    if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode &&
+        Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined())
+      getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
     return true;
+  }
 
   // Avoid emitting this error twice for the same location.  Using a hashtable
   // like this is unfortunate, but because we must continue parsing as normal
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -579,6 +579,30 @@
     }
   }
 
+  // Emit a global array containing all external kernels or device variables
+  // used by host functions and mark it as used for CUDA/HIP. This is necessary
+  // to get kernels or device variables in archives linked in even if these
+  // kernels or device variables are only used in host functions.
+  if (!Context.CUDAExternalDeviceDeclODRUsedByHost.empty()) {
+    SmallVector<llvm::Constant *, 8> UsedArray;
+    for (auto D : Context.CUDAExternalDeviceDeclODRUsedByHost) {
+      GlobalDecl GD;
+      if (auto *FD = dyn_cast<FunctionDecl>(D))
+        GD = GlobalDecl(FD, KernelReferenceKind::Kernel);
+      else
+        GD = GlobalDecl(D);
+      UsedArray.push_back(llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+          GetAddrOfGlobal(GD), Int8PtrTy));
+    }
+
+    llvm::ArrayType *ATy = llvm::ArrayType::get(Int8PtrTy, UsedArray.size());
+
+    auto *GV = new llvm::GlobalVariable(
+        getModule(), ATy, false, llvm::GlobalValue::AppendingLinkage,
+        llvm::ConstantArray::get(ATy, UsedArray), "gpu.used.external");
+    addCompilerUsedGlobal(GV);
+  }
+
   emitLLVMUsed();
   if (SanStats)
     SanStats->finish();
Index: clang/include/clang/AST/ASTContext.h
===================================================================
--- clang/include/clang/AST/ASTContext.h
+++ clang/include/clang/AST/ASTContext.h
@@ -1160,6 +1160,10 @@
   /// Keep track of CUDA/HIP device-side variables ODR-used by host code.
   llvm::DenseSet<const VarDecl *> CUDADeviceVarODRUsedByHost;
 
+  /// Keep track of CUDA/HIP external kernels or device variables ODR-used by
+  /// host code.
+  llvm::DenseSet<const ValueDecl *> CUDAExternalDeviceDeclODRUsedByHost;
+
   ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents,
              SelectorTable &sels, Builtin::Context &builtins,
              TranslationUnitKind TUKind);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to