psalz created this revision. psalz added reviewers: hliao, tra, aheejin. Herald added a project: clang. Herald added a subscriber: cfe-commits.
NOTE: This is a work in progress and mainly intended to highlight the issue - i.e., I'm not certain the provided solution is appropriate. Given this CUDA program template<typename Lambda> __global__ void run_this(Lambda lambda) { lambda(); } template<typename T> struct remove_reference { using type = T; }; template<typename T> struct remove_reference<T&> { using type = T; }; template<typename T> constexpr typename remove_reference<T>::type&& move(T&& t) { return static_cast<typename remove_reference<T>::type&&>(t); } int main() { auto foo = move([](){}); run_this<<<1, 1, 1>>>([]() __device__ { printf("Hello World\n"); }); return 0; } the assertion at the top of `CGNVCUDARuntime::emitDeviceStub` will fail. For release builds the effect is simply a `cudaErrorInvalidDeviceFunction` error at run time. The reason for this is that the mangled names of the device stub and the actual device side function differ: The stub is called `_Z8run_thisIZ4mainE3$_1EvT_`, while the device function is `_Z8run_thisIZ4mainE3$_0EvT_`. The difference comes down to the anonymous struct ID that is maintained and assigned by the `ManglerContext`. It appears that for the latter `getAnonymousStructId` is never called for the moved no-op lambda, resulting in an ID of 0 for the kernel. My proposed solution would be to simply share the `ManglerContext` used by the `CGNVCUDARuntime` and `CGCXXABI` code generators. For this I've added a new `ASTContext::getSharedMangleContext` function that memoizes created manglers for the given target ABI. From looking at `ManglerContext` to me at least it doesn't look like that could cause any issues, but then again, I really don't know much about Clang's internals. Of course an alternative solution could be to make sure that `getAnonymousStructId` is always called for both lambdas (and in the correct order), but again I don't really know why that is not happening in the first place. Repository: rC Clang https://reviews.llvm.org/D64015 Files: include/clang/AST/ASTContext.h lib/AST/ASTContext.cpp lib/CodeGen/CGCUDANV.cpp lib/CodeGen/CGCXXABI.h
Index: lib/CodeGen/CGCXXABI.h =================================================================== --- lib/CodeGen/CGCXXABI.h +++ lib/CodeGen/CGCXXABI.h @@ -43,10 +43,10 @@ class CGCXXABI { protected: CodeGenModule &CGM; - std::unique_ptr<MangleContext> MangleCtx; + std::shared_ptr<MangleContext> MangleCtx; CGCXXABI(CodeGenModule &CGM) - : CGM(CGM), MangleCtx(CGM.getContext().createMangleContext()) {} + : CGM(CGM), MangleCtx(CGM.getContext().getSharedMangleContext()) {} protected: ImplicitParamDecl *getThisDecl(CodeGenFunction &CGF) { Index: lib/CodeGen/CGCUDANV.cpp =================================================================== --- lib/CodeGen/CGCUDANV.cpp +++ lib/CodeGen/CGCUDANV.cpp @@ -60,7 +60,7 @@ /// Whether we generate relocatable device code. bool RelocatableDeviceCode; /// Mangle context for device. - std::unique_ptr<MangleContext> DeviceMC; + std::shared_ptr<MangleContext> DeviceMC; llvm::FunctionCallee getSetupArgumentFn() const; llvm::FunctionCallee getLaunchFn() const; @@ -154,7 +154,7 @@ : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()), TheModule(CGM.getModule()), RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode), - DeviceMC(CGM.getContext().createMangleContext( + DeviceMC(CGM.getContext().getSharedMangleContext( CGM.getContext().getAuxTargetInfo())) { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); Index: lib/AST/ASTContext.cpp =================================================================== --- lib/AST/ASTContext.cpp +++ lib/AST/ASTContext.cpp @@ -10048,10 +10048,8 @@ return VTContext.get(); } -MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { - if (!T) - T = Target; - switch (T->getCXXABI().getKind()) { +MangleContext::ManglerKind getManglerKindForABI(TargetCXXABI::Kind K) { + switch (K) { case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericItanium: case TargetCXXABI::GenericARM: @@ -10060,13 +10058,37 @@ case TargetCXXABI::iOS64: case TargetCXXABI::WebAssembly: case TargetCXXABI::WatchOS: - return ItaniumMangleContext::create(*this, getDiagnostics()); + return MangleContext::MK_Itanium; case TargetCXXABI::Microsoft: - return MicrosoftMangleContext::create(*this, getDiagnostics()); + return MangleContext::MK_Microsoft; } llvm_unreachable("Unsupported ABI"); } +MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { + if (!T) + T = Target; + switch (getManglerKindForABI(T->getCXXABI().getKind())) { + case MangleContext::MK_Itanium: + return ItaniumMangleContext::create(*this, getDiagnostics()); + case MangleContext::MK_Microsoft: + return MicrosoftMangleContext::create(*this, getDiagnostics()); + } + llvm_unreachable("Unsupported MangleContext"); +} + +std::shared_ptr<MangleContext>& +ASTContext::getSharedMangleContext(const TargetInfo* T) { + if (!T) + T = Target; + auto Kind = getManglerKindForABI(T->getCXXABI().getKind()); + auto I = SharedMangleContexts.find(Kind); + if (I == SharedMangleContexts.end()) + I = SharedMangleContexts.insert( + {Kind, std::shared_ptr<MangleContext>(createMangleContext(T))}).first; + return I->second; +} + CXXABI::~CXXABI() = default; size_t ASTContext::getSideTableAllocatedMemory() const { Index: include/clang/AST/ASTContext.h =================================================================== --- include/clang/AST/ASTContext.h +++ include/clang/AST/ASTContext.h @@ -24,6 +24,7 @@ #include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/ExternalASTSource.h" +#include "clang/AST/Mangle.h" #include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/PrettyPrinter.h" #include "clang/AST/RawCommentList.h" @@ -96,7 +97,6 @@ class DiagnosticsEngine; class Expr; class FixedPointSemantics; -class MangleContext; class MangleNumberingContext; class MaterializeTemporaryExpr; class MemberSpecializationInfo; @@ -2254,6 +2254,15 @@ /// If \p T is null pointer, assume the target in ASTContext. MangleContext *createMangleContext(const TargetInfo *T = nullptr); +private: + llvm::DenseMap<std::underlying_type<MangleContext::ManglerKind>::type, + std::shared_ptr<MangleContext>> SharedMangleContexts; + +public: + /// If \p T is null pointer, assume the target in ASTContext. + std::shared_ptr<MangleContext> + &getSharedMangleContext(const TargetInfo *T = nullptr); + void DeepCollectObjCIvars(const ObjCInterfaceDecl *OI, bool leafClass, SmallVectorImpl<const ObjCIvarDecl*> &Ivars) const;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits