psalz created this revision.
psalz added reviewers: hliao, tra, aheejin.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

NOTE: This is a work in progress and mainly intended to highlight the issue - 
i.e., I'm not certain the provided solution is appropriate.

Given this CUDA program

  template<typename Lambda>
  __global__ void run_this(Lambda lambda) {
      lambda();
  }
  
  template<typename T>
  struct remove_reference {
      using type = T;
  };
  
  template<typename T>
  struct remove_reference<T&> {
      using type = T;
  };
  
  template<typename T>
  constexpr typename remove_reference<T>::type&& move(T&& t) {
      return static_cast<typename remove_reference<T>::type&&>(t);
  }
  
  int main() {
      auto foo = move([](){});
      run_this<<<1, 1, 1>>>([]() __device__ { printf("Hello World\n"); }); 
      return 0;
  }

the assertion at the top of `CGNVCUDARuntime::emitDeviceStub` will fail. For 
release builds the effect is simply a `cudaErrorInvalidDeviceFunction` error at 
run time. The reason for this is that the mangled names of the device stub and 
the actual device side function differ: The stub is called 
`_Z8run_thisIZ4mainE3$_1EvT_`, while the device function is 
`_Z8run_thisIZ4mainE3$_0EvT_`. The difference comes down to the anonymous 
struct ID that is maintained and assigned by the `ManglerContext`. It appears 
that for the latter `getAnonymousStructId` is never called for the moved no-op 
lambda, resulting in an ID of 0 for the kernel.

My proposed solution would be to simply share the `ManglerContext` used by the 
`CGNVCUDARuntime` and `CGCXXABI` code generators. For this I've added a new 
`ASTContext::getSharedMangleContext` function that memoizes created manglers 
for the given target ABI. From looking at `ManglerContext` to me at least it 
doesn't look like that could cause any issues, but then again, I really don't 
know much about Clang's internals.

Of course an alternative solution could be to make sure that 
`getAnonymousStructId` is always called for both lambdas (and in the correct 
order), but again I don't really know why that is not happening in the first 
place.


Repository:
  rC Clang

https://reviews.llvm.org/D64015

Files:
  include/clang/AST/ASTContext.h
  lib/AST/ASTContext.cpp
  lib/CodeGen/CGCUDANV.cpp
  lib/CodeGen/CGCXXABI.h

Index: lib/CodeGen/CGCXXABI.h
===================================================================
--- lib/CodeGen/CGCXXABI.h
+++ lib/CodeGen/CGCXXABI.h
@@ -43,10 +43,10 @@
 class CGCXXABI {
 protected:
   CodeGenModule &CGM;
-  std::unique_ptr<MangleContext> MangleCtx;
+  std::shared_ptr<MangleContext> MangleCtx;
 
   CGCXXABI(CodeGenModule &CGM)
-    : CGM(CGM), MangleCtx(CGM.getContext().createMangleContext()) {}
+    : CGM(CGM), MangleCtx(CGM.getContext().getSharedMangleContext()) {}
 
 protected:
   ImplicitParamDecl *getThisDecl(CodeGenFunction &CGF) {
Index: lib/CodeGen/CGCUDANV.cpp
===================================================================
--- lib/CodeGen/CGCUDANV.cpp
+++ lib/CodeGen/CGCUDANV.cpp
@@ -60,7 +60,7 @@
   /// Whether we generate relocatable device code.
   bool RelocatableDeviceCode;
   /// Mangle context for device.
-  std::unique_ptr<MangleContext> DeviceMC;
+  std::shared_ptr<MangleContext> DeviceMC;
 
   llvm::FunctionCallee getSetupArgumentFn() const;
   llvm::FunctionCallee getLaunchFn() const;
@@ -154,7 +154,7 @@
     : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
       TheModule(CGM.getModule()),
       RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode),
-      DeviceMC(CGM.getContext().createMangleContext(
+      DeviceMC(CGM.getContext().getSharedMangleContext(
           CGM.getContext().getAuxTargetInfo())) {
   CodeGen::CodeGenTypes &Types = CGM.getTypes();
   ASTContext &Ctx = CGM.getContext();
Index: lib/AST/ASTContext.cpp
===================================================================
--- lib/AST/ASTContext.cpp
+++ lib/AST/ASTContext.cpp
@@ -10048,10 +10048,8 @@
   return VTContext.get();
 }
 
-MangleContext *ASTContext::createMangleContext(const TargetInfo *T) {
-  if (!T)
-    T = Target;
-  switch (T->getCXXABI().getKind()) {
+MangleContext::ManglerKind getManglerKindForABI(TargetCXXABI::Kind K) {
+  switch (K) {
   case TargetCXXABI::GenericAArch64:
   case TargetCXXABI::GenericItanium:
   case TargetCXXABI::GenericARM:
@@ -10060,13 +10058,37 @@
   case TargetCXXABI::iOS64:
   case TargetCXXABI::WebAssembly:
   case TargetCXXABI::WatchOS:
-    return ItaniumMangleContext::create(*this, getDiagnostics());
+    return MangleContext::MK_Itanium;
   case TargetCXXABI::Microsoft:
-    return MicrosoftMangleContext::create(*this, getDiagnostics());
+    return MangleContext::MK_Microsoft;
   }
   llvm_unreachable("Unsupported ABI");
 }
 
+MangleContext *ASTContext::createMangleContext(const TargetInfo *T) {
+  if (!T)
+    T = Target;
+  switch (getManglerKindForABI(T->getCXXABI().getKind())) {
+  case MangleContext::MK_Itanium:
+    return ItaniumMangleContext::create(*this, getDiagnostics());
+  case MangleContext::MK_Microsoft:
+    return MicrosoftMangleContext::create(*this, getDiagnostics());
+  }
+  llvm_unreachable("Unsupported MangleContext");
+}
+
+std::shared_ptr<MangleContext>&
+ASTContext::getSharedMangleContext(const TargetInfo* T) {
+  if (!T)
+    T = Target;
+  auto Kind = getManglerKindForABI(T->getCXXABI().getKind());
+  auto I = SharedMangleContexts.find(Kind);
+  if (I == SharedMangleContexts.end())
+    I = SharedMangleContexts.insert(
+      {Kind, std::shared_ptr<MangleContext>(createMangleContext(T))}).first;
+  return I->second;
+}
+
 CXXABI::~CXXABI() = default;
 
 size_t ASTContext::getSideTableAllocatedMemory() const {
Index: include/clang/AST/ASTContext.h
===================================================================
--- include/clang/AST/ASTContext.h
+++ include/clang/AST/ASTContext.h
@@ -24,6 +24,7 @@
 #include "clang/AST/DeclarationName.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExternalASTSource.h"
+#include "clang/AST/Mangle.h"
 #include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/PrettyPrinter.h"
 #include "clang/AST/RawCommentList.h"
@@ -96,7 +97,6 @@
 class DiagnosticsEngine;
 class Expr;
 class FixedPointSemantics;
-class MangleContext;
 class MangleNumberingContext;
 class MaterializeTemporaryExpr;
 class MemberSpecializationInfo;
@@ -2254,6 +2254,15 @@
   /// If \p T is null pointer, assume the target in ASTContext.
   MangleContext *createMangleContext(const TargetInfo *T = nullptr);
 
+private:
+  llvm::DenseMap<std::underlying_type<MangleContext::ManglerKind>::type,
+    std::shared_ptr<MangleContext>> SharedMangleContexts;
+
+public:
+  /// If \p T is null pointer, assume the target in ASTContext.
+  std::shared_ptr<MangleContext>
+  &getSharedMangleContext(const TargetInfo *T = nullptr);
+
   void DeepCollectObjCIvars(const ObjCInterfaceDecl *OI, bool leafClass,
                             SmallVectorImpl<const ObjCIvarDecl*> &Ivars) const;
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to