yaxunl created this revision.
yaxunl added reviewers: tra, rjmccall, JonChesterfield, hliao.
Herald added a subscriber: dang.
yaxunl requested review of this revision.

This is separated from https://reviews.llvm.org/D80858

For -fgpu-rdc mode, static device vars in different TU's may have the same name.
To support accessing file-scope static device variables in host code, we need 
to give them
a distinct name and external linkage. This can be done by postfixing each 
static device variable with
a distinct CUID (Compilation Unit ID). Also we have to make sure the host 
compilation and device
compilation of the same compilation unit use identical CUID.

This patch added a distinct CUID for each input file, which is represented by 
InputAction.
clang initially creates an InputAction for each input file for the host 
compilation. In CUDA/HIP action
builder, each InputAction is given a CUID and cloned for each GPU arch, and the 
CUID is also cloned. In this way,
we guarantee the corresponding device and host compilation for the same file 
shared the
same CUID, therefore the postfixed device variable and shadow variable share 
the same name.
On the other hand, different compilation units have different CUID, therefore a 
static variable
with the same name but in a different compilation unit will have a different 
name.

Since the static device variables have different name across compilation units, 
now we let
them have external linkage so that they can be looked up by the runtime.

-fuse-cuid=random|hash|none is added to control the method to generate CUID. 
The default
is hash. -cuid=X is also added to specify CUID explicitly, which overrides 
-fuse-cuid.


https://reviews.llvm.org/D85223

Files:
  clang/include/clang/AST/ASTContext.h
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Basic/LangOptions.h
  clang/include/clang/Driver/Action.h
  clang/include/clang/Driver/Compilation.h
  clang/include/clang/Driver/Options.td
  clang/lib/AST/ASTContext.cpp
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/lib/Driver/Action.cpp
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/test/CodeGenCUDA/static-device-var-rdc.cu
  clang/test/Driver/hip-cuid.hip
  clang/test/Frontend/hip-cuid.hip
  clang/test/SemaCUDA/static-device-var.cu

Index: clang/test/SemaCUDA/static-device-var.cu
===================================================================
--- /dev/null
+++ clang/test/SemaCUDA/static-device-var.cu
@@ -0,0 +1,37 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
+// RUN:    -emit-llvm -o - %s -fsyntax-only -verify
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux \
+// RUN:    -emit-llvm -o - %s -fsyntax-only -verify
+
+#include "Inputs/cuda.h"
+
+__device__ void f1() {
+  const static int b = 123;
+  static int a;
+  // expected-error@-1 {{within a __device__ function, only __shared__ variables or const variables without device memory qualifier may be marked 'static'}}
+}
+
+__global__ void k1() {
+  const static int b = 123;
+  static int a;
+  // expected-error@-1 {{within a __global__ function, only __shared__ variables or const variables without device memory qualifier may be marked 'static'}}
+}
+
+static __device__ int x;
+static __constant__ int y;
+
+__global__ void kernel(int *a) {
+  a[0] = x;
+  a[1] = y;
+}
+
+int* getDeviceSymbol(int *x);
+
+void foo() {
+  getDeviceSymbol(&x);
+  getDeviceSymbol(&y);
+}
Index: clang/test/Frontend/hip-cuid.hip
===================================================================
--- /dev/null
+++ clang/test/Frontend/hip-cuid.hip
@@ -0,0 +1,6 @@
+// RUN: not %clang_cc1 -cuid=abc-123 -offload-arch=gfx906 %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=INVALID %s
+
+// INVALID: invalid value 'abc-123' in '-cuid=abc-123' (alphanumeric characters and underscore only)
+
+// RUN: %clang_cc1 -cuid=abc_123 -offload-arch=gfx906 %s
Index: clang/test/Driver/hip-cuid.hip
===================================================================
--- /dev/null
+++ clang/test/Driver/hip-cuid.hip
@@ -0,0 +1,130 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// Check invalid -fuse-cuid= option.
+
+// RUN: not %clang -### -x hip \
+// RUN:   -target x86_64-unknown-linux-gnu \
+// RUN:   --offload-arch=gfx900 \
+// RUN:   --offload-arch=gfx906 \
+// RUN:   -c -nogpulib -fuse-cuid=invalid \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck -check-prefixes=INVALID %s
+
+// INVALID: invalid value 'invalid' in '-fuse-cuid=invalid'
+
+// Check random CUID generator.
+
+// RUN: %clang -### -x hip \
+// RUN:   -target x86_64-unknown-linux-gnu \
+// RUN:   --offload-arch=gfx900 \
+// RUN:   --offload-arch=gfx906 \
+// RUN:   -c -nogpulib -fuse-cuid=random \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,HEX %s
+
+// Check fixed CUID.
+
+// RUN: %clang -### -x hip \
+// RUN:   -target x86_64-unknown-linux-gnu \
+// RUN:   --offload-arch=gfx900 \
+// RUN:   --offload-arch=gfx906 \
+// RUN:   -c -nogpulib -cuid=xyz_123 \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,FIXED %s
+
+// Check fixed CUID override -fuse-cuid.
+
+// RUN: %clang -### -x hip \
+// RUN:   -target x86_64-unknown-linux-gnu \
+// RUN:   --offload-arch=gfx900 \
+// RUN:   --offload-arch=gfx906 \
+// RUN:   -c -nogpulib -fuse-cuid=random -cuid=xyz_123 \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,FIXED %s
+
+// Check hash CUID generator.
+
+// RUN: %clang -### -x hip \
+// RUN:   -target x86_64-unknown-linux-gnu \
+// RUN:   --offload-arch=gfx900 \
+// RUN:   --offload-arch=gfx906 \
+// RUN:   -c -nogpulib -fuse-cuid=hash \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,HEX %s
+
+// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// COMMON-SAME: "-target-cpu" "gfx900"
+// HEX-SAME: "-cuid=[[CUID:[0-9a-f]+]]"
+// FIXED-SAME: "-cuid=[[CUID:xyz_123]]"
+// COMMON-SAME: "{{.*}}a.cu"
+
+// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// COMMON-SAME: "-target-cpu" "gfx906"
+// COMMON-SAME: "-cuid=[[CUID]]"
+// COMMON-SAME: "{{.*}}a.cu"
+
+// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// COMMON-SAME: "-cuid=[[CUID]]"
+// COMMON-SAME: "{{.*}}a.cu"
+
+// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// COMMON-SAME: "-target-cpu" "gfx900"
+// HEX-NOT: "-cuid=[[CUID]]"
+// HEX-SAME: "-cuid=[[CUID2:[0-9a-f]+]]"
+// FIXED-SAME: "-cuid=[[CUID2:xyz_123]]"
+// COMMON-SAME: "{{.*}}b.hip"
+
+// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// COMMON-SAME: "-target-cpu" "gfx906"
+// HEX-NOT: "-cuid=[[CUID]]"
+// COMMON-SAME: "-cuid=[[CUID2]]"
+// COMMON-SAME: "{{.*}}b.hip"
+
+// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// HEX-NOT: "-cuid=[[CUID]]"
+// COMMON-SAME: "-cuid=[[CUID2]]"
+// COMMON-SAME: "{{.*}}b.hip"
+
+// Check CUID generated by hash.
+// The same CUID is generated for the same file with the same options.
+
+// RUN: rm -rf %t.out
+
+// RUN: %clang -### -x hip -target x86_64-unknown-linux-gnu \
+// RUN:   --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu >%t.out 2>&1
+
+// RUN: %clang -### -x hip -target x86_64-unknown-linux-gnu \
+// RUN:   --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu >>%t.out 2>&1
+
+// RUN: FileCheck %s -check-prefixes=HASH -input-file %t.out
+
+// HASH: "{{.*}}clang{{.*}}" {{.*}} "-target-cpu" "gfx906" {{.*}}"-cuid=[[CUID:[0-9a-f]+]]"
+// HASH: "{{.*}}clang{{.*}}" {{.*}} "-target-cpu" "gfx906" {{.*}}"-cuid=[[CUID]]"
+
+
+// Check CUID generated by hash.
+// Different CUID's are generated for the same file with different options.
+
+// RUN: rm -rf %t.out
+
+// RUN: %clang -### -x hip -target x86_64-unknown-linux-gnu -DX=1 \
+// RUN:   --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu >%t.out 2>&1
+
+// RUN: %clang -### -x hip -target x86_64-unknown-linux-gnu -DX=2 \
+// RUN:   --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \
+// RUN:   %S/Inputs/../Inputs/hip_multiple_inputs/a.cu >>%t.out 2>&1
+
+// RUN: FileCheck %s -check-prefixes=HASH2 -input-file %t.out
+
+// HASH2: "{{.*}}clang{{.*}}" {{.*}} "-target-cpu" "gfx906" {{.*}}"-cuid=[[CUID:[0-9a-f]+]]"
+// HASH2-NOT: "{{.*}}clang{{.*}}" {{.*}} "-target-cpu" "gfx906" {{.*}}"-cuid=[[CUID]]"
Index: clang/test/CodeGenCUDA/static-device-var-rdc.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/static-device-var-rdc.cu
@@ -0,0 +1,89 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -check-prefixes=DEV,INT-DEV %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux \
+// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -check-prefixes=HOST,INT-HOST %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=123abc \
+// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -check-prefixes=DEV,EXT-DEV %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=123abc \
+// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -check-prefixes=HOST,EXT-HOST %s
+
+#include "Inputs/cuda.h"
+
+// Test function scope static device variable, which should not be externalized.
+// DEV-DAG: @_ZZ6kernelPiPPKiE1w = internal addrspace(4) constant i32 1
+
+
+// HOST-DAG: @_ZL1x = internal global i32 undef
+// HOST-DAG: @_ZL1y = internal global i32 undef
+
+// Test normal static device variables
+// INT-DEV-DAG: @_ZL1x = internal addrspace(1) global i32 0
+// INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00"
+
+// Test externalized static device variables
+// EXT-DEV-DAG: @_ZL1x.static.123abc = addrspace(1) externally_initialized global i32 0
+// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.123abc\00"
+
+static __device__ int x;
+
+// Test static device variables not used by host code should not be externalized
+// DEV-DAG: @_ZL2x2 = internal addrspace(1) global i32 0
+
+static __device__ int x2;
+
+// Test normal static device variables
+// INT-DEV-DAG: @_ZL1y = internal addrspace(4) global i32 0
+// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00"
+
+// Test externalized static device variables
+// EXT-DEV-DAG: @_ZL1y.static.123abc = addrspace(4) externally_initialized global i32 0
+// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.123abc\00"
+
+static __constant__ int y;
+
+// Test static host variable, which should not be externalized nor registered.
+// HOST-DAG: @_ZL1z = internal global i32 0
+// DEV-NOT: @_ZL1z
+static int z;
+
+// Test static device variable in inline function, which should not be
+// externalized nor registered.
+// DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat
+
+inline __device__ void devfun(const int ** b) {
+  const static int p = 2;
+  b[0] = &p;
+}
+
+__global__ void kernel(int *a, const int **b) {
+  const static int w = 1;
+  a[0] = x;
+  a[1] = y;
+  b[0] = &w;
+  b[1] = &x2;
+  devfun(b);
+}
+
+int* getDeviceSymbol(int *x);
+
+void foo() {
+  getDeviceSymbol(&x);
+  getDeviceSymbol(&y);
+  z = 123;
+}
+
+// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]]
+// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]]
+// HOST-NOT: __hipRegisterVar({{.*}}@_ZL2x2
+// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w
+// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -2628,6 +2628,21 @@
           << Args.getLastArg(OPT_fgpu_allow_device_init)->getAsString(Args);
   }
   Opts.HIPUseNewLaunchAPI = Args.hasArg(OPT_fhip_new_launch_api);
+
+  // Only alphanumeric and underscore is allowed in -cuid option.
+  if (auto *A = Args.getLastArg(OPT_cuid_EQ)) {
+    const char *V = A->getValue();
+    bool IsValid = true;
+    for (const char *P = V; *P; ++P) {
+      if (!std::isalnum(*P) && *P != '_') {
+        Diags.Report(diag::err_drv_invalid_cuid) << A->getAsString(Args) << V;
+        IsValid = false;
+        break;
+      }
+    }
+    if (IsValid)
+      Opts.CUID = std::string(V);
+  }
   if (Opts.HIP)
     Opts.GPUMaxThreadsPerBlock = getLastArgIntValue(
         Args, OPT_gpu_max_threads_per_block_EQ, Opts.GPUMaxThreadsPerBlock);
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -6021,6 +6021,18 @@
       CmdArgs.push_back("-fcuda-short-ptr");
   }
 
+  if (IsCuda || IsHIP) {
+    // Determine the original source input.
+    const Action *SourceAction = &JA;
+    while (SourceAction->getKind() != Action::InputClass) {
+      assert(!SourceAction->getInputs().empty() && "unexpected root action!");
+      SourceAction = SourceAction->getInputs()[0];
+    }
+    auto CUID = cast<InputAction>(SourceAction)->getId();
+    if (!CUID.empty())
+      CmdArgs.push_back(Args.MakeArgString(Twine("-cuid=") + Twine(CUID)));
+  }
+
   if (IsHIP)
     CmdArgs.push_back("-fcuda-allow-variadic-functions");
 
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -73,6 +73,7 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Host.h"
+#include "llvm/Support/MD5.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Process.h"
@@ -2408,6 +2409,14 @@
     /// Default GPU architecture if there's no one specified.
     CudaArch DefaultCudaArch = CudaArch::UNKNOWN;
 
+    /// Method to generate compilation unit ID specified by option
+    /// '-fuse-cuid='.
+    enum UseCUIDKind { CUID_Hash, CUID_Random, CUID_None, CUID_Invalid };
+    UseCUIDKind UseCUID = CUID_Hash;
+
+    /// Compilation unit ID specified by option '-cuid='.
+    StringRef FixedCUID;
+
   public:
     CudaActionBuilderBase(Compilation &C, DerivedArgList &Args,
                           const Driver::InputList &Inputs,
@@ -2443,9 +2452,32 @@
         // Replicate inputs for each GPU architecture.
         auto Ty = IA->getType() == types::TY_HIP ? types::TY_HIP_DEVICE
                                                  : types::TY_CUDA_DEVICE;
+        std::string CUID = FixedCUID.str();
+        if (CUID.empty()) {
+          if (UseCUID == CUID_Random)
+            CUID = llvm::utohexstr(llvm::sys::Process::GetRandomNumber(),
+                                   /*LowerCase=*/true);
+          else if (UseCUID == CUID_Hash) {
+            llvm::MD5 Hasher;
+            llvm::MD5::MD5Result Hash;
+            SmallString<256> RealPath;
+            llvm::sys::fs::real_path(IA->getInputArg().getValue(), RealPath,
+                                     /*expand_tilde=*/true);
+            Hasher.update(RealPath);
+            for (auto *A : Args) {
+              if (A->getOption().matches(options::OPT_INPUT))
+                continue;
+              Hasher.update(A->getAsString(Args));
+            }
+            Hasher.final(Hash);
+            CUID = llvm::utohexstr(Hash.low(), /*LowerCase=*/true);
+          }
+        }
+        IA->setId(CUID);
+
         for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
           CudaDeviceActions.push_back(
-              C.MakeAction<InputAction>(IA->getInputArg(), Ty));
+              C.MakeAction<InputAction>(IA->getInputArg(), Ty, IA->getId()));
         }
 
         return ABRT_Success;
@@ -2561,6 +2593,21 @@
                               options::OPT_cuda_device_only);
       EmitLLVM = Args.getLastArg(options::OPT_emit_llvm);
       EmitAsm = Args.getLastArg(options::OPT_S);
+      FixedCUID = Args.getLastArgValue(options::OPT_cuid_EQ);
+      if (Arg *A = Args.getLastArg(options::OPT_fuse_cuid_EQ)) {
+        StringRef UseCUIDStr = A->getValue();
+        UseCUID = llvm::StringSwitch<UseCUIDKind>(UseCUIDStr)
+                      .Case("hash", CUID_Hash)
+                      .Case("random", CUID_Random)
+                      .Case("none", CUID_None)
+                      .Default(CUID_Invalid);
+        if (UseCUID == CUID_Invalid) {
+          C.getDriver().Diag(diag::err_drv_invalid_value)
+              << A->getAsString(Args) << UseCUIDStr;
+          C.setContainsError();
+          return true;
+        }
+      }
 
       // Collect all cuda_gpu_arch parameters, removing duplicates.
       std::set<CudaArch> GpuArchs;
Index: clang/lib/Driver/Action.cpp
===================================================================
--- clang/lib/Driver/Action.cpp
+++ clang/lib/Driver/Action.cpp
@@ -165,8 +165,8 @@
 
 void InputAction::anchor() {}
 
-InputAction::InputAction(const Arg &_Input, types::ID _Type)
-    : Action(InputClass, _Type), Input(_Input) {}
+InputAction::InputAction(const Arg &_Input, types::ID _Type, StringRef _Id)
+    : Action(InputClass, _Type), Input(_Input), Id(_Id.str()) {}
 
 void BindArchAction::anchor() {}
 
Index: clang/lib/CodeGen/CodeGenModule.h
===================================================================
--- clang/lib/CodeGen/CodeGenModule.h
+++ clang/lib/CodeGen/CodeGenModule.h
@@ -1406,6 +1406,10 @@
                                            TBAAAccessInfo *TBAAInfo = nullptr);
   bool stopAutoInit();
 
+  /// Print the postfix for externalized static variable for single source
+  /// offloading languages CUDA and HIP.
+  void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const;
+
 private:
   llvm::Constant *GetOrCreateLLVMFunction(
       StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable,
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -1083,6 +1083,11 @@
       }
     }
 
+  // Make unique name for device side static file-scope variable for HIP.
+  if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
+      CGM.getLangOpts().GPURelocatableDeviceCode &&
+      CGM.getLangOpts().CUDAIsDevice)
+    CGM.printPostfixForExternalizedStaticVar(Out);
   return std::string(Out.str());
 }
 
@@ -1140,9 +1145,16 @@
     }
   }
 
-  auto FoundName = MangledDeclNames.find(CanonicalGD);
-  if (FoundName != MangledDeclNames.end())
-    return FoundName->second;
+  // In CUDA/HIP device compilation with -fgpu-rdc, the mangled name of a
+  // static device variable depends on whether the variable is referenced by
+  // a host or device host function. Therefore the mangled name cannot be
+  // cached.
+  if (!LangOpts.CUDAIsDevice ||
+      !getContext().mayExternalizeStaticVar(GD.getDecl())) {
+    auto FoundName = MangledDeclNames.find(CanonicalGD);
+    if (FoundName != MangledDeclNames.end())
+      return FoundName->second;
+  }
 
   // Keep the first result in the case of a mangling collision.
   const auto *ND = cast<NamedDecl>(GD.getDecl());
@@ -2653,7 +2665,15 @@
     DelayedCXXInitPosition[Global] = CXXGlobalInits.size();
     CXXGlobalInits.push_back(nullptr);
   }
-
+#if 0
+  // We need to decide whether to externalize a static variable after checking
+  // whether it is referenced in host code.
+  if (isa<VarDecl>(Global) && getContext().mayExternalizeStaticVar(
+      cast<VarDecl>(Global))) {
+    addDeferredDeclToEmit(GD);
+    return;
+  }
+#endif
   StringRef MangledName = getMangledName(GD);
   if (GetGlobalValue(MangledName) != nullptr) {
     // The value has already been used and should therefore be emitted.
@@ -6055,3 +6075,8 @@
   }
   return false;
 }
+
+void CodeGenModule::printPostfixForExternalizedStaticVar(
+    llvm::raw_ostream &OS) const {
+  OS << ".static." << getLangOpts().CUID;
+}
Index: clang/lib/CodeGen/CGCUDANV.cpp
===================================================================
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -234,6 +234,16 @@
     DeviceSideName = std::string(Out.str());
   } else
     DeviceSideName = std::string(ND->getIdentifier()->getName());
+
+  // Make unique name for device side static file-scope variable for HIP.
+  if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
+      CGM.getLangOpts().GPURelocatableDeviceCode) {
+    SmallString<256> Buffer;
+    llvm::raw_svector_ostream Out(Buffer);
+    Out << DeviceSideName;
+    CGM.printPostfixForExternalizedStaticVar(Out);
+    DeviceSideName = std::string(Out.str());
+  }
   return DeviceSideName;
 }
 
Index: clang/lib/AST/ASTContext.cpp
===================================================================
--- clang/lib/AST/ASTContext.cpp
+++ clang/lib/AST/ASTContext.cpp
@@ -10281,7 +10281,10 @@
       return GVA_StrongODR;
     // Single source offloading languages like CUDA/HIP need to be able to
     // access static device variables from host code of the same compilation
-    // unit. This is done by externalizing the static variable.
+    // unit. This is done by externalizing the static variable with a shared
+    // name between the host and device compilation which is the same for the
+    // same compilation unit whereas different among different compilation
+    // units.
     if (Context.shouldExternalizeStaticVar(D))
       return GVA_StrongExternal;
   }
@@ -11170,10 +11173,15 @@
   return DB << "a prior #pragma section";
 }
 
-bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
-  return !getLangOpts().GPURelocatableDeviceCode &&
+bool ASTContext::mayExternalizeStaticVar(const Decl *D) const {
+  return (!getLangOpts().CUID.empty() ||
+          !getLangOpts().GPURelocatableDeviceCode) &&
          (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) &&
          isa<VarDecl>(D) && cast<VarDecl>(D)->isFileVarDecl() &&
-         cast<VarDecl>(D)->getStorageClass() == SC_Static &&
+         cast<VarDecl>(D)->getStorageClass() == SC_Static;
+}
+
+bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
+  return mayExternalizeStaticVar(D) &&
          CUDAStaticDeviceVarReferencedByHost.count(cast<VarDecl>(D));
 }
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -653,6 +653,18 @@
 def gpu_max_threads_per_block_EQ : Joined<["--"], "gpu-max-threads-per-block=">,
   Flags<[CC1Option]>,
   HelpText<"Default max threads per block for kernel launch bounds for HIP">;
+def cuid_EQ : Joined<["-"], "cuid=">, Flags<[CC1Option]>,
+  HelpText<"An ID for compilation unit, which should be the same for the same "
+           "compilation unit but different for different compilation units. "
+           "It is used to externalize device-side static variables for single "
+           "source offloading languages CUDA and HIP so that they can be "
+           "accessed by the host code of the same compilation unit.">;
+def fuse_cuid_EQ : Joined<["-"], "fuse-cuid=">,
+  HelpText<"Method to generate ID's for compilation units for single source "
+           "offloading languages CUDA and HIP: 'hash' (ID's generated by hashing "
+           "file path and command line options) | 'random' (ID's generated as "
+           "random numbers) | 'none' (disabled). Default is 'hash'. This option "
+           "will be overriden by option '-cuid=[ID]' if it is specified." >;
 def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group<i_Group>,
   HelpText<"Path to libomptarget-nvptx libraries">;
 def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>,
Index: clang/include/clang/Driver/Compilation.h
===================================================================
--- clang/include/clang/Driver/Compilation.h
+++ clang/include/clang/Driver/Compilation.h
@@ -297,6 +297,8 @@
   /// Return whether an error during the parsing of the input args.
   bool containsError() const { return ContainsError; }
 
+  void setContainsError() { ContainsError = true; }
+
   /// Redirect - Redirect output of this compilation. Can only be done once.
   ///
   /// \param Redirects - array of optional paths. The array should have a size
Index: clang/include/clang/Driver/Action.h
===================================================================
--- clang/include/clang/Driver/Action.h
+++ clang/include/clang/Driver/Action.h
@@ -214,14 +214,18 @@
 
 class InputAction : public Action {
   const llvm::opt::Arg &Input;
-
+  std::string Id;
   virtual void anchor();
 
 public:
-  InputAction(const llvm::opt::Arg &Input, types::ID Type);
+  InputAction(const llvm::opt::Arg &Input, types::ID Type,
+              StringRef Id = StringRef());
 
   const llvm::opt::Arg &getInputArg() const { return Input; }
 
+  void setId(StringRef _Id) { Id = _Id.str(); }
+  StringRef getId() const { return Id; }
+
   static bool classof(const Action *A) {
     return A->getKind() == InputClass;
   }
Index: clang/include/clang/Basic/LangOptions.h
===================================================================
--- clang/include/clang/Basic/LangOptions.h
+++ clang/include/clang/Basic/LangOptions.h
@@ -293,6 +293,12 @@
   /// host code generation.
   std::string OMPHostIRFile;
 
+  /// The user provided compilation unit ID, if non-empty. This is used to
+  /// externalize static variables which is needed to support accessing static
+  /// device variables in host code for single source offloading languages
+  /// like CUDA/HIP.
+  std::string CUID;
+
   /// Indicates whether the front-end is explicitly told that the
   /// input is a header file (i.e. -x c-header).
   bool IsHeaderFile = false;
Index: clang/include/clang/Basic/DiagnosticDriverKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -208,6 +208,8 @@
 
 def err_drv_invalid_value : Error<"invalid value '%1' in '%0'">;
 def err_drv_invalid_int_value : Error<"invalid integral value '%1' in '%0'">;
+def err_drv_invalid_cuid : Error<"invalid value '%1' in '%0' (alphanumeric characters "
+  " and underscore only)">;
 def err_drv_invalid_remap_file : Error<
     "invalid option '%0' not of the form <from-file>;<to-file>">;
 def err_drv_invalid_gcc_output_type : Error<
Index: clang/include/clang/AST/ASTContext.h
===================================================================
--- clang/include/clang/AST/ASTContext.h
+++ clang/include/clang/AST/ASTContext.h
@@ -3022,6 +3022,9 @@
   /// Return a new OMPTraitInfo object owned by this context.
   OMPTraitInfo &getNewOMPTraitInfo();
 
+  /// Whether a C++ static variable may be externalized.
+  bool mayExternalizeStaticVar(const Decl *D) const;
+
   /// Whether a C++ static variable should be externalized.
   bool shouldExternalizeStaticVar(const Decl *D) const;
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to